Index: llvm/lib/CodeGen/RegAllocFast.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocFast.cpp +++ llvm/lib/CodeGen/RegAllocFast.cpp @@ -56,6 +56,10 @@ STATISTIC(NumLoads , "Number of loads added"); STATISTIC(NumCoalesced, "Number of copies coalesced"); +// FIXME: Remove this switch when all testcases are fixed! +static cl::opt IgnoreMissingDefs("rafast-ignore-missing-defs", + cl::Hidden); + static RegisterRegAlloc fastRegAlloc("fast", "fast register allocator", createFastRegisterAllocator); @@ -85,8 +89,9 @@ MachineInstr *LastUse = nullptr; ///< Last instr to use reg. Register VirtReg; ///< Virtual register number. MCPhysReg PhysReg = 0; ///< Currently held here. - unsigned short LastOpNum = 0; ///< OpNum on LastUse. - bool Dirty = false; ///< Register needs spill. + bool LiveOut = false; ///< Register is possibly live out. + bool Reloaded = false; ///< Register was reloaded. + bool Error = false; ///< Could not allocate. explicit LiveReg(Register VirtReg) : VirtReg(VirtReg) {} @@ -101,6 +106,9 @@ LiveRegMap LiveVirtRegs; DenseMap> LiveDbgValueMap; + /// List of DBG_VALUE that we encountered without the vreg being assigned + /// because they were placed after the last use of the vreg. + DenseMap> DanglingDbgValues; /// Has a bit set for every virtual register for which it was determined /// that it is alive across blocks. @@ -112,9 +120,13 @@ /// immediately without checking aliases. regFree, - /// A reserved register has been assigned explicitly (e.g., setting up a - /// call parameter), and it remains reserved until it is used. - regReserved + /// A pre-assigned register has been assigned before register allocation + /// (e.g., setting up a call parameter). + regPreAssigned, + + /// Used temporarily in reloadAtBegin() to mark register units that are + /// live-in to the basic block. + regLiveIn, /// A register state may also be a virtual register number, indication /// that the physical register is currently allocated to a virtual @@ -124,15 +136,17 @@ /// Maps each physical register to a RegUnitState enum or virtual register. std::vector RegUnitStates; - SmallVector VirtDead; SmallVector Coalesced; using RegUnitSet = SparseSet>; /// Set of register units that are used in the current instruction, and so /// cannot be allocated. RegUnitSet UsedInInstr; + RegUnitSet PhysRegUses; + SmallVector DefOperandIndexes; void setPhysRegState(MCPhysReg PhysReg, unsigned NewState); + bool isPhysRegFree(MCPhysReg PhysReg) const; /// Mark a physreg as used in this instruction. void markRegUsedInInstr(MCPhysReg PhysReg) { @@ -141,13 +155,29 @@ } /// Check if a physreg or any of its aliases are used in this instruction. - bool isRegUsedInInstr(MCPhysReg PhysReg) const { - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + bool isRegUsedInInstr(MCPhysReg PhysReg, bool LookAtPhysRegUses) const { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { if (UsedInInstr.count(*Units)) return true; + if (LookAtPhysRegUses && PhysRegUses.count(*Units)) + return true; + } return false; } + /// Mark physical register as being used in a register use operand. + /// This is only used by the special livethrough handling code. + void markPhysRegUsedInInstr(MCPhysReg PhysReg) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + PhysRegUses.insert(*Units); + } + + /// Remove mark of physical register being used in the instruction. + void unmarkRegUsedInInstr(MCPhysReg PhysReg) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + UsedInInstr.erase(*Units); + } + enum : unsigned { spillClean = 50, spillDirty = 100, @@ -177,27 +207,21 @@ bool runOnMachineFunction(MachineFunction &MF) override; void allocateBasicBlock(MachineBasicBlock &MBB); + + void addRegClassDefCounts(std::vector &RegClassDefCounts, + Register Reg) const; + void allocateInstruction(MachineInstr &MI); void handleDebugValue(MachineInstr &MI); - void handleThroughOperands(MachineInstr &MI, - SmallVectorImpl &VirtDead); - bool isLastUseOfLocalReg(const MachineOperand &MO) const; - - void addKillFlag(const LiveReg &LRI); #ifndef NDEBUG bool verifyRegStateMapping(const LiveReg &LR) const; #endif + bool usePhysReg(MachineInstr &MI, MCPhysReg PhysReg); + bool definePhysReg(MachineInstr &MI, MCPhysReg PhysReg); + bool displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg); + void freePhysReg(MCPhysReg PhysReg); - void killVirtReg(LiveReg &LR); - void killVirtReg(Register VirtReg); - void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR); - void spillVirtReg(MachineBasicBlock::iterator MI, Register VirtReg); - - void usePhysReg(MachineOperand &MO); - void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, - unsigned NewState); unsigned calcSpillCost(MCPhysReg PhysReg) const; - void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); LiveRegMap::iterator findLiveVirtReg(Register VirtReg) { return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); @@ -207,14 +231,24 @@ return LiveVirtRegs.find(Register::virtReg2Index(VirtReg)); } - void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint); + void assignVirtToPhysReg(MachineInstr &MI, LiveReg &, MCPhysReg PhysReg); + void allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint, + bool LookAtPhysRegUses = false); void allocVirtRegUndef(MachineOperand &MO); - MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, - Register Hint); - LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, - Register Hint); - void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut); - bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg); + void assignDanglingDebugValues(MachineInstr &Def, Register VirtReg, + MCPhysReg Reg); + void defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, + Register VirtReg); + void defineVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg, + bool LookAtPhysRegUses = false); + void useVirtReg(MachineInstr &MI, unsigned OpNum, Register VirtReg); + + MachineBasicBlock::iterator + getMBBBeginInsertionPoint(MachineBasicBlock &MBB, + SmallSet &PrologLiveIns) const; + + void reloadAtBegin(MachineBasicBlock &MBB); + void setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg); Register traceCopies(Register VirtReg) const; Register traceCopyChain(Register Reg) const; @@ -243,6 +277,14 @@ RegUnitStates[*UI] = NewState; } +bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const { + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + if (RegUnitStates[*UI] != regFree) + return false; + } + return true; +} + /// This allocates space for the specified virtual register to be held on the /// stack. int RegAllocFast::getStackSpaceFor(Register VirtReg) { @@ -300,7 +342,7 @@ // block. static const unsigned Limit = 8; unsigned C = 0; - for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) { + for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) { if (UseInst.getParent() != MBB || ++C >= Limit) { MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg)); // Cannot be live-out if there are no successors. @@ -352,15 +394,19 @@ TII->storeRegToStackSlot(*MBB, Before, AssignedReg, Kill, FI, &RC, TRI); ++NumStores; - // If this register is used by DBG_VALUE then insert new DBG_VALUE to - // identify spilled location as the place to find corresponding variable's - // value. + // When we spill a virtual register, we will have spill instructions behind + // every definition of it, meaning we can switch all the DBG_VALUEs over + // to just reference the stack slot. SmallVectorImpl &LRIDbgValues = LiveDbgValueMap[VirtReg]; for (MachineInstr *DBG : LRIDbgValues) { MachineInstr *NewDV = buildDbgValueForSpill(*MBB, Before, *DBG, FI); assert(NewDV->getParent() == MBB && "dangling parent pointer"); (void)NewDV; LLVM_DEBUG(dbgs() << "Inserting debug info due to spill:\n" << *NewDV); + // Rewrite unassigned dbg_values to use the stack slot. + MachineOperand &MO = DBG->getOperand(0); + if (MO.isReg() && MO.getReg() == 0) + updateDbgValueForSpill(*DBG, FI); } // Now this register is spilled there is should not be any DBG_VALUE // pointing to this register because they are all pointing to spilled value @@ -379,113 +425,75 @@ ++NumLoads; } -/// Return true if MO is the only remaining reference to its virtual register, -/// and it is guaranteed to be a block-local register. -bool RegAllocFast::isLastUseOfLocalReg(const MachineOperand &MO) const { - // If the register has ever been spilled or reloaded, we conservatively assume - // it is a global register used in multiple blocks. - if (StackSlotForVirtReg[MO.getReg()] != -1) - return false; - - // Check that the use/def chain has exactly one operand - MO. - MachineRegisterInfo::reg_nodbg_iterator I = MRI->reg_nodbg_begin(MO.getReg()); - if (&*I != &MO) - return false; - return ++I == MRI->reg_nodbg_end(); -} - -/// Set kill flags on last use of a virtual register. -void RegAllocFast::addKillFlag(const LiveReg &LR) { - if (!LR.LastUse) return; - MachineOperand &MO = LR.LastUse->getOperand(LR.LastOpNum); - if (MO.isUse() && !LR.LastUse->isRegTiedToDefOperand(LR.LastOpNum)) { - if (MO.getReg() == LR.PhysReg) - MO.setIsKill(); - // else, don't do anything we are problably redefining a - // subreg of this register and given we don't track which - // lanes are actually dead, we cannot insert a kill flag here. - // Otherwise we may end up in a situation like this: - // ... = (MO) physreg:sub1, implicit killed physreg - // ... <== Here we would allow later pass to reuse physreg:sub1 - // which is potentially wrong. - // LR:sub0 = ... - // ... = LR.sub1 <== This is going to use physreg:sub1 - } -} - -#ifndef NDEBUG -bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const { - for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) { - if (RegUnitStates[*UI] != LR.VirtReg) - return false; - } +/// Get basic block begin insertion point. +/// This is not just MBB.begin() because surprisingly we have EH_LABEL +/// instructions marking the begin of a basic block. This means we must insert +/// new instructions after such labels... +MachineBasicBlock::iterator +RegAllocFast::getMBBBeginInsertionPoint( + MachineBasicBlock &MBB, SmallSet &PrologLiveIns) const { + MachineBasicBlock::iterator I = MBB.begin(); + while (I != MBB.end()) { + if (I->isLabel()) { + ++I; + continue; + } - return true; -} -#endif + // Most reloads should be inserted after prolog instructions. + if (!TII->isBasicBlockPrologue(*I)) + break; -/// Mark virtreg as no longer available. -void RegAllocFast::killVirtReg(LiveReg &LR) { - assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); - addKillFlag(LR); - MCPhysReg PhysReg = LR.PhysReg; - setPhysRegState(PhysReg, regFree); - LR.PhysReg = 0; -} + // However if a prolog instruction reads a register that needs to be + // reloaded, the reload should be inserted before the prolog. + for (MachineOperand &MO : I->operands()) { + if (MO.isReg()) + PrologLiveIns.insert(MO.getReg()); + } -/// Mark virtreg as no longer available. -void RegAllocFast::killVirtReg(Register VirtReg) { - assert(Register::isVirtualRegister(VirtReg) && - "killVirtReg needs a virtual register"); - LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - if (LRI != LiveVirtRegs.end() && LRI->PhysReg) - killVirtReg(*LRI); -} + ++I; + } -/// This method spills the value specified by VirtReg into the corresponding -/// stack slot if needed. -void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, - Register VirtReg) { - assert(Register::isVirtualRegister(VirtReg) && - "Spilling a physical register is illegal!"); - LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && - "Spilling unmapped virtual register"); - spillVirtReg(MI, *LRI); + return I; } -/// Do the actual work of spilling. -void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { - assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); - - MCPhysReg PhysReg = LR.PhysReg; +/// Reload all currently assigned virtual registers. +void RegAllocFast::reloadAtBegin(MachineBasicBlock &MBB) { + if (LiveVirtRegs.empty()) + return; - if (LR.Dirty) { - // If this physreg is used by the instruction, we want to kill it on the - // instruction, not on the spill. - bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI; - LR.Dirty = false; + for (MachineBasicBlock::RegisterMaskPair P : MBB.liveins()) { + MCPhysReg Reg = P.PhysReg; + // Set state to live-in. This possibly overrides mappings to virtual + // registers but we don't care anymore at this point. + setPhysRegState(Reg, regLiveIn); + } - spill(MI, LR.VirtReg, PhysReg, SpillKill); - if (SpillKill) - LR.LastUse = nullptr; // Don't kill register again - } - killVirtReg(LR); -} + SmallSet PrologLiveIns; -/// Spill all dirty virtregs without killing them. -void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) { - if (LiveVirtRegs.empty()) - return; // The LiveRegMap is keyed by an unsigned (the virtreg number), so the order // of spilling here is deterministic, if arbitrary. - for (LiveReg &LR : LiveVirtRegs) { - if (!LR.PhysReg) + MachineBasicBlock::iterator InsertBefore + = getMBBBeginInsertionPoint(MBB, PrologLiveIns); + for (const LiveReg &LR : LiveVirtRegs) { + MCPhysReg PhysReg = LR.PhysReg; + if (PhysReg == 0) continue; - if (OnlyLiveOut && !mayLiveOut(LR.VirtReg)) + + unsigned FirstUnit = *MCRegUnitIterator(PhysReg, TRI); + if (RegUnitStates[FirstUnit] == regLiveIn) continue; - spillVirtReg(MI, LR); + + assert((&MBB != &MBB.getParent()->front() || IgnoreMissingDefs) && + "no reload in start block. Missing vreg def?"); + + if (PrologLiveIns.count(PhysReg)) { + // FIXME: Theoretically this should use an insert point skipping labels + // but I'm not sure how labels should interact with prolog instruction + // that need reloads. + reload(MBB.begin(), LR.VirtReg, PhysReg); + } else + reload(InsertBefore, LR.VirtReg, PhysReg); } LiveVirtRegs.clear(); } @@ -493,51 +501,74 @@ /// Handle the direct use of a physical register. Check that the register is /// not used by a virtreg. Kill the physreg, marking it free. This may add /// implicit kills to MO->getParent() and invalidate MO. -void RegAllocFast::usePhysReg(MachineOperand &MO) { - // Ignore undef uses. - if (MO.isUndef()) - return; - - Register PhysReg = MO.getReg(); - assert(PhysReg.isPhysical() && "Bad usePhysReg operand"); - - markRegUsedInInstr(PhysReg); - - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { - switch (RegUnitStates[*UI]) { - case regReserved: - RegUnitStates[*UI] = regFree; - LLVM_FALLTHROUGH; - case regFree: - break; - default: - llvm_unreachable("Unexpected reg unit state"); - } - } +bool RegAllocFast::usePhysReg(MachineInstr &MI, MCPhysReg Reg) { + assert(Register::isPhysicalRegister(Reg) && "expected physreg"); + bool displacedAny = displacePhysReg(MI, Reg); + setPhysRegState(Reg, regPreAssigned); + markRegUsedInInstr(Reg); + return displacedAny; +} - // All aliases are disabled, bring register into working set. - setPhysRegState(PhysReg, regFree); - MO.setIsKill(); +bool RegAllocFast::definePhysReg(MachineInstr &MI, MCPhysReg Reg) { + bool displacedAny = displacePhysReg(MI, Reg); + setPhysRegState(Reg, regPreAssigned); + return displacedAny; } /// Mark PhysReg as reserved or free after spilling any virtregs. This is very /// similar to defineVirtReg except the physreg is reserved instead of /// allocated. -void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, - MCPhysReg PhysReg, unsigned NewState) { +bool RegAllocFast::displacePhysReg(MachineInstr &MI, MCPhysReg PhysReg) { + bool displacedAny = false; + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { - switch (unsigned VirtReg = RegUnitStates[*UI]) { - default: - spillVirtReg(MI, VirtReg); + unsigned Unit = *UI; + switch (unsigned VirtReg = RegUnitStates[Unit]) { + default: { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end() && "datastructures in sync"); + MachineBasicBlock::iterator ReloadBefore = + std::next((MachineBasicBlock::iterator)MI.getIterator()); + reload(ReloadBefore, VirtReg, LRI->PhysReg); + + setPhysRegState(LRI->PhysReg, regFree); + LRI->PhysReg = 0; + LRI->Reloaded = true; + displacedAny = true; + break; + } + case regPreAssigned: + RegUnitStates[Unit] = regFree; + displacedAny = true; break; case regFree: - case regReserved: break; } } + return displacedAny; +} - markRegUsedInInstr(PhysReg); - setPhysRegState(PhysReg, NewState); +void RegAllocFast::freePhysReg(MCPhysReg PhysReg) { + LLVM_DEBUG(dbgs() << "Freeing " << printReg(PhysReg, TRI) << ':'); + + unsigned FirstUnit = *MCRegUnitIterator(PhysReg, TRI); + switch (unsigned VirtReg = RegUnitStates[FirstUnit]) { + case regFree: + LLVM_DEBUG(dbgs() << '\n'); + return; + case regPreAssigned: + LLVM_DEBUG(dbgs() << '\n'); + setPhysRegState(PhysReg, regFree); + return; + default: { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + assert(LRI != LiveVirtRegs.end()); + LLVM_DEBUG(dbgs() << ' ' << printReg(LRI->VirtReg, TRI) << '\n'); + setPhysRegState(LRI->PhysReg, regFree); + LRI->PhysReg = 0; + } + return; + } } /// Return the cost of spilling clearing out PhysReg and aliases so it is free @@ -545,35 +576,61 @@ /// disabled - it can be allocated directly. /// \returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RegAllocFast::calcSpillCost(MCPhysReg PhysReg) const { - if (isRegUsedInInstr(PhysReg)) { - LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) - << " is already used in instr.\n"); - return spillImpossible; - } - for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { switch (unsigned VirtReg = RegUnitStates[*UI]) { case regFree: break; - case regReserved: - LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " - << printReg(PhysReg, TRI) << " is reserved already.\n"); + case regPreAssigned: + LLVM_DEBUG(dbgs() << "Cannot spill pre-assigned " + << printReg(PhysReg, TRI) << '\n'); return spillImpossible; default: { - LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && - "Missing VirtReg entry"); - return LRI->Dirty ? spillDirty : spillClean; + bool SureSpill = StackSlotForVirtReg[VirtReg] != -1 || + findLiveVirtReg(VirtReg)->LiveOut; + return SureSpill ? spillClean : spillDirty; } } } return 0; } +void RegAllocFast::assignDanglingDebugValues(MachineInstr &Definition, + Register VirtReg, MCPhysReg Reg) { + auto UDBGValIter = DanglingDbgValues.find(VirtReg); + if (UDBGValIter == DanglingDbgValues.end()) + return; + + SmallVectorImpl &Dangling = UDBGValIter->second; + for (MachineInstr *DbgValue : Dangling) { + assert(DbgValue->isDebugValue()); + MachineOperand &MO = DbgValue->getOperand(0); + if (!MO.isReg()) + continue; + + // Test whether the physreg survives from the definition to the DBG_VALUE. + MCPhysReg SetToReg = Reg; + unsigned Limit = 20; + for (MachineBasicBlock::iterator I = std::next(Definition.getIterator()), + E = DbgValue->getIterator(); I != E; ++I) { + if (I->modifiesRegister(Reg, TRI) || --Limit == 0) { + LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue + << '\n'); + SetToReg = 0; + break; + } + } + MO.setReg(SetToReg); + if (SetToReg != 0) + MO.setIsRenamable(); + } + Dangling.clear(); +} + /// This method updates local state so that we know that PhysReg is the /// proper container for VirtReg now. The physical register must not be used /// for anything else when this is called. -void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) { +void RegAllocFast::assignVirtToPhysReg(MachineInstr &AtMI, LiveReg &LR, + MCPhysReg PhysReg) { Register VirtReg = LR.VirtReg; LLVM_DEBUG(dbgs() << "Assigning " << printReg(VirtReg, TRI) << " to " << printReg(PhysReg, TRI) << '\n'); @@ -581,6 +638,8 @@ assert(PhysReg != 0 && "Trying to assign no register"); LR.PhysReg = PhysReg; setPhysRegState(PhysReg, VirtReg); + + assignDanglingDebugValues(AtMI, VirtReg, PhysReg); } static bool isCoalescable(const MachineInstr &MI) { @@ -624,11 +683,10 @@ } /// Allocates a physical register for VirtReg. -void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, Register Hint0) { +void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, + Register Hint0, bool LookAtPhysRegUses) { const Register VirtReg = LR.VirtReg; - - assert(Register::isVirtualRegister(VirtReg) && - "Can only allocate virtual registers"); + assert(LR.PhysReg == 0); const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg) @@ -636,41 +694,36 @@ << " with hint " << printReg(Hint0, TRI) << '\n'); // Take hint when possible. - if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && - RC.contains(Hint0)) { - // Ignore the hint if we would have to spill a dirty register. - unsigned Cost = calcSpillCost(Hint0); - if (Cost < spillDirty) { + if (Hint0.isPhysical() && MRI->isAllocatable(Hint0) && RC.contains(Hint0) && + !isRegUsedInInstr(Hint0, LookAtPhysRegUses)) { + // Take hint if the register is currently free. + if (isPhysRegFree(Hint0)) { LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI) << '\n'); - if (Cost) - definePhysReg(MI, Hint0, regFree); - assignVirtToPhysReg(LR, Hint0); + assignVirtToPhysReg(MI, LR, Hint0); return; } else { - LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI) - << "occupied\n"); + LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint0, TRI) + << " occupied\n"); } } else { Hint0 = Register(); } + // Try other hint. Register Hint1 = traceCopies(VirtReg); - if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && - RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) { - // Ignore the hint if we would have to spill a dirty register. - unsigned Cost = calcSpillCost(Hint1); - if (Cost < spillDirty) { + if (Hint1.isPhysical() && MRI->isAllocatable(Hint1) && RC.contains(Hint1) && + !isRegUsedInInstr(Hint1, LookAtPhysRegUses)) { + // Take hint if the register is currently free. + if (isPhysRegFree(Hint1)) { LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI) - << '\n'); - if (Cost) - definePhysReg(MI, Hint1, regFree); - assignVirtToPhysReg(LR, Hint1); + << '\n'); + assignVirtToPhysReg(MI, LR, Hint1); return; } else { - LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI) - << "occupied\n"); + LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint1, TRI) + << " occupied\n"); } } else { Hint1 = Register(); @@ -681,15 +734,20 @@ ArrayRef AllocationOrder = RegClassInfo.getOrder(&RC); for (MCPhysReg PhysReg : AllocationOrder) { LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' '); + if (isRegUsedInInstr(PhysReg, LookAtPhysRegUses)) { + LLVM_DEBUG(dbgs() << "already used in instr.\n"); + continue; + } + unsigned Cost = calcSpillCost(PhysReg); LLVM_DEBUG(dbgs() << "Cost: " << Cost << " BestCost: " << BestCost << '\n'); // Immediate take a register with cost 0. if (Cost == 0) { - assignVirtToPhysReg(LR, PhysReg); + assignVirtToPhysReg(MI, LR, PhysReg); return; } - if (PhysReg == Hint1 || PhysReg == Hint0) + if (PhysReg == Hint0 || PhysReg == Hint1) Cost -= spillPrefBonus; if (Cost < BestCost) { @@ -705,13 +763,14 @@ MI.emitError("inline assembly requires more registers than available"); else MI.emitError("ran out of registers during register allocation"); - definePhysReg(MI, *AllocationOrder.begin(), regFree); - assignVirtToPhysReg(LR, *AllocationOrder.begin()); + + LR.Error = true; + LR.PhysReg = 0; return; } - definePhysReg(MI, BestReg, regFree); - assignVirtToPhysReg(LR, BestReg); + displacePhysReg(MI, BestReg); + assignVirtToPhysReg(MI, LR, BestReg); } void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) { @@ -739,212 +798,166 @@ MO.setIsRenamable(true); } -/// Allocates a register for VirtReg and mark it as dirty. -MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, - Register VirtReg, Register Hint) { - assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register"); +/// Variation of defineVirtReg() with special handling for livethrough regs +/// (tied or earlyclobber) that may interfere with preassigned uses. +void RegAllocFast::defineLiveThroughVirtReg(MachineInstr &MI, unsigned OpNum, + Register VirtReg) { + LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg); + if (LRI != LiveVirtRegs.end()) { + MCPhysReg PrevReg = LRI->PhysReg; + if (PrevReg != 0 && isRegUsedInInstr(PrevReg, true)) { + LLVM_DEBUG(dbgs() << "Need new assignment for " << printReg(PrevReg, TRI) + << " (tied/earlyclobber resolution)\n"); + freePhysReg(PrevReg); + LRI->PhysReg = 0; + allocVirtReg(MI, *LRI, 0, true); + MachineBasicBlock::iterator InsertBefore = + std::next((MachineBasicBlock::iterator)MI.getIterator()); + LLVM_DEBUG(dbgs() << "Copy " << printReg(LRI->PhysReg, TRI) << " to " + << printReg(PrevReg, TRI) << '\n'); + BuildMI(*MBB, InsertBefore, MI.getDebugLoc(), + TII->get(TargetOpcode::COPY), PrevReg) + .addReg(LRI->PhysReg, llvm::RegState::Kill); + } + MachineOperand &MO = MI.getOperand(OpNum); + if (MO.getSubReg() && !MO.isUndef()) { + LRI->LastUse = &MI; + } + } + return defineVirtReg(MI, OpNum, VirtReg, true); +} + +/// Allocates a register for VirtReg definition. Typically the register is +/// already assigned from a use of the virtreg, however we still need to +/// perform an allocation if: +/// - It is a dead definition without any uses. +/// - The value is live out and all uses are in different basic blocks. +void RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum, + Register VirtReg, bool LookAtPhysRegUses) { + assert(VirtReg.isVirtual() && "Not a virtual register"); + MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); - if (!LRI->PhysReg) { - // If there is no hint, peek at the only use of this register. - if ((!Hint || !Hint.isPhysical()) && - MRI->hasOneNonDBGUse(VirtReg)) { - const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg); - // It's a copy, use the destination register as a hint. - if (UseMI.isCopyLike()) - Hint = UseMI.getOperand(0).getReg(); + if (New) { + if (!MO.isDead()) { + if (mayLiveOut(VirtReg)) { + LRI->LiveOut = true; + } else { + // It is a dead def without the dead flag; add the flag now. + MO.setIsDead(true); + } } - allocVirtReg(MI, *LRI, Hint); - } else if (LRI->LastUse) { - // Redefining a live register - kill at the last use, unless it is this - // instruction defining VirtReg multiple times. - if (LRI->LastUse != &MI || LRI->LastUse->getOperand(LRI->LastOpNum).isUse()) - addKillFlag(*LRI); } - assert(LRI->PhysReg && "Register not assigned"); - LRI->LastUse = &MI; - LRI->LastOpNum = OpNum; - LRI->Dirty = true; - markRegUsedInInstr(LRI->PhysReg); - return LRI->PhysReg; + if (LRI->PhysReg == 0) + allocVirtReg(MI, *LRI, 0, LookAtPhysRegUses); + else { + assert(!isRegUsedInInstr(LRI->PhysReg, LookAtPhysRegUses) && + "TODO: preassign mismatch"); + LLVM_DEBUG(dbgs() << "In def of " << printReg(VirtReg, TRI) + << " use existing assignment to " + << printReg(LRI->PhysReg, TRI) << '\n'); + } + + MCPhysReg PhysReg = LRI->PhysReg; + assert(PhysReg != 0 && "Register not assigned"); + if (LRI->Reloaded || LRI->LiveOut) { + if (!MI.isImplicitDef()) { + MachineBasicBlock::iterator SpillBefore = + std::next((MachineBasicBlock::iterator)MI.getIterator()); + LLVM_DEBUG(dbgs() << "Spill Reason: LO: " << LRI->LiveOut << " RL: " + << LRI->Reloaded << '\n'); + bool Kill = LRI->LastUse == nullptr; + spill(SpillBefore, VirtReg, PhysReg, Kill); + LRI->LastUse = nullptr; + } + LRI->LiveOut = false; + LRI->Reloaded = false; + } + markRegUsedInInstr(PhysReg); + setPhysReg(MI, MO, PhysReg); } -/// Make sure VirtReg is available in a physreg and return it. -RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI, - unsigned OpNum, - Register VirtReg, - Register Hint) { - assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register"); +/// Allocates a register for a VirtReg use. +void RegAllocFast::useVirtReg(MachineInstr &MI, unsigned OpNum, + Register VirtReg) { + assert(VirtReg.isVirtual() && "Not a virtual register"); + MachineOperand &MO = MI.getOperand(OpNum); LiveRegMap::iterator LRI; bool New; std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg)); - MachineOperand &MO = MI.getOperand(OpNum); - if (!LRI->PhysReg) { - allocVirtReg(MI, *LRI, Hint); - reload(MI, VirtReg, LRI->PhysReg); - } else if (LRI->Dirty) { - if (isLastUseOfLocalReg(MO)) { - LLVM_DEBUG(dbgs() << "Killing last use: " << MO << '\n'); - if (MO.isUse()) - MO.setIsKill(); - else - MO.setIsDead(); - } else if (MO.isKill()) { - LLVM_DEBUG(dbgs() << "Clearing dubious kill: " << MO << '\n'); - MO.setIsKill(false); - } else if (MO.isDead()) { - LLVM_DEBUG(dbgs() << "Clearing dubious dead: " << MO << '\n'); - MO.setIsDead(false); + if (New) { + MachineOperand &MO = MI.getOperand(OpNum); + if (!MO.isKill()) { + if (mayLiveOut(VirtReg)) { + LRI->LiveOut = true; + } else { + // It is a last (killing) use without the kill flag; add the flag now. + MO.setIsKill(true); + } } - } else if (MO.isKill()) { - // We must remove kill flags from uses of reloaded registers because the - // register would be killed immediately, and there might be a second use: - // %foo = OR killed %x, %x - // This would cause a second reload of %x into a different register. - LLVM_DEBUG(dbgs() << "Clearing clean kill: " << MO << '\n'); - MO.setIsKill(false); - } else if (MO.isDead()) { - LLVM_DEBUG(dbgs() << "Clearing clean dead: " << MO << '\n'); - MO.setIsDead(false); + } else { + assert((!MO.isKill() || LRI->LastUse == &MI) && "Invalid kill flag"); } - assert(LRI->PhysReg && "Register not assigned"); + + // If necessary allocate a register. + if (LRI->PhysReg == 0) { + assert(!MO.isTied() && "tied op should be allocated"); + Register Hint; + if (MI.isCopy() && MI.getOperand(1).getSubReg() == 0) { + Hint = MI.getOperand(0).getReg(); + assert(Hint.isPhysical() && + "Copy destination should already be assigned"); + } + allocVirtReg(MI, *LRI, Hint, false); + if (LRI->Error) { + const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg); + ArrayRef AllocationOrder = RegClassInfo.getOrder(&RC); + setPhysReg(MI, MO, *AllocationOrder.begin()); + return; + } + } + LRI->LastUse = &MI; - LRI->LastOpNum = OpNum; markRegUsedInInstr(LRI->PhysReg); - return *LRI; + setPhysReg(MI, MO, LRI->PhysReg); } /// Changes operand OpNum in MI the refer the PhysReg, considering subregs. This /// may invalidate any operand pointers. Return true if the operand kills its /// register. -bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, +void RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg) { - bool Dead = MO.isDead(); if (!MO.getSubReg()) { MO.setReg(PhysReg); MO.setIsRenamable(true); - return MO.isKill() || Dead; + return; } // Handle subregister index. MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : Register()); MO.setIsRenamable(true); - MO.setSubReg(0); + // Note: We leave the subreg number around a little longer in case of defs. + // This is so that the register freeing logic in allocateInstruction can still + // recognize this as subregister defs. The code there will clear the number. + if (!MO.isDef()) + MO.setSubReg(0); // A kill flag implies killing the full register. Add corresponding super // register kill. if (MO.isKill()) { MI.addRegisterKilled(PhysReg, TRI, true); - return true; + return; } // A of a sub-register requires an implicit def of the full // register. - if (MO.isDef() && MO.isUndef()) - MI.addRegisterDefined(PhysReg, TRI); - - return Dead; -} - -// Handles special instruction operand like early clobbers and tied ops when -// there are additional physreg defines. -void RegAllocFast::handleThroughOperands(MachineInstr &MI, - SmallVectorImpl &VirtDead) { - LLVM_DEBUG(dbgs() << "Scanning for through registers:"); - SmallSet ThroughRegs; - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Reg.isVirtual()) - continue; - if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) || - (MO.getSubReg() && MI.readsVirtualRegister(Reg))) { - if (ThroughRegs.insert(Reg).second) - LLVM_DEBUG(dbgs() << ' ' << printReg(Reg)); - } - } - - // If any physreg defines collide with preallocated through registers, - // we must spill and reallocate. - LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || !MO.isDef()) continue; - Register Reg = MO.getReg(); - if (!Reg || !Reg.isPhysical()) - continue; - markRegUsedInInstr(Reg); - - for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) { - if (!ThroughRegs.count(RegUnitStates[*UI])) - continue; - - // Need to spill any aliasing registers. - for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) { - for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) { - definePhysReg(MI, *SI, regFree); - } - } - } - } - - SmallVector PartialDefs; - LLVM_DEBUG(dbgs() << "Allocating tied uses.\n"); - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) - continue; - if (MO.isUse()) { - if (!MO.isTied()) continue; - LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO - << ") is tied to operand " << MI.findTiedOperandIdx(I) - << ".\n"); - LiveReg &LR = reloadVirtReg(MI, I, Reg, 0); - MCPhysReg PhysReg = LR.PhysReg; - setPhysReg(MI, MO, PhysReg); - // Note: we don't update the def operand yet. That would cause the normal - // def-scan to attempt spilling. - } else if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) { - LLVM_DEBUG(dbgs() << "Partial redefine: " << MO << '\n'); - // Reload the register, but don't assign to the operand just yet. - // That would confuse the later phys-def processing pass. - LiveReg &LR = reloadVirtReg(MI, I, Reg, 0); - PartialDefs.push_back(LR.PhysReg); - } - } - - LLVM_DEBUG(dbgs() << "Allocating early clobbers.\n"); - for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Register::isVirtualRegister(Reg)) - continue; - if (!MO.isEarlyClobber()) - continue; - // Note: defineVirtReg may invalidate MO. - MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, 0); - if (setPhysReg(MI, MI.getOperand(I), PhysReg)) - VirtDead.push_back(Reg); - } - - // Restore UsedInInstr to a state usable for allocating normal virtual uses. - UsedInInstr.clear(); - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; - Register Reg = MO.getReg(); - if (!Reg || !Reg.isPhysical()) - continue; - LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI) - << " as used in instr\n"); - markRegUsedInInstr(Reg); + if (MO.isDef() && MO.isUndef()) { + if (MO.isDead()) + MI.addRegisterDead(PhysReg, TRI, true); + else + MI.addRegisterDefined(PhysReg, TRI); } - - // Also mark PartialDefs as used to avoid reallocation. - for (Register PartialDef : PartialDefs) - markRegUsedInInstr(PartialDef); } #ifndef NDEBUG @@ -955,15 +968,21 @@ switch (unsigned VirtReg = RegUnitStates[Unit]) { case regFree: break; - case regReserved: + case regPreAssigned: dbgs() << " " << printRegUnit(Unit, TRI) << "[P]"; break; + case regLiveIn: + llvm_unreachable("Should not have regLiveIn in map"); default: { dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg); LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry"); - if (I->Dirty) - dbgs() << "[D]"; + if (I->LiveOut || I->Reloaded) { + dbgs() << '['; + if (I->LiveOut) dbgs() << 'O'; + if (I->Reloaded) dbgs() << 'R'; + dbgs() << ']'; + } assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present"); break; } @@ -986,111 +1005,277 @@ } #endif -void RegAllocFast::allocateInstruction(MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - - // If this is a copy, we may be able to coalesce. - Register CopySrcReg; - Register CopyDstReg; - unsigned CopySrcSub = 0; - unsigned CopyDstSub = 0; - if (MI.isCopy()) { - CopyDstReg = MI.getOperand(0).getReg(); - CopySrcReg = MI.getOperand(1).getReg(); - CopyDstSub = MI.getOperand(0).getSubReg(); - CopySrcSub = MI.getOperand(1).getSubReg(); +/// Count number of defs consumed from each register class by \p Reg +void RegAllocFast::addRegClassDefCounts(std::vector &RegClassDefCounts, + Register Reg) const { + assert(RegClassDefCounts.size() == TRI->getNumRegClasses()); + + if (Reg.isVirtual()) { + const TargetRegisterClass *OpRC = MRI->getRegClass(Reg); + for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses(); + RCIdx != RCIdxEnd; ++RCIdx) { + const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx); + // FIXME: Consider aliasing sub/super registers. + if (OpRC->hasSubClassEq(IdxRC)) + ++RegClassDefCounts[RCIdx]; + } + + return; } - // Track registers used by instruction. + for (unsigned RCIdx = 0, RCIdxEnd = TRI->getNumRegClasses(); + RCIdx != RCIdxEnd; ++RCIdx) { + const TargetRegisterClass *IdxRC = TRI->getRegClass(RCIdx); + for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) { + if (IdxRC->contains(*Alias)) { + ++RegClassDefCounts[RCIdx]; + break; + } + } + } +} + +void RegAllocFast::allocateInstruction(MachineInstr &MI) { + // The basic algorithm here is: + // 1. Mark registers of def operands as free + // 2. Allocate registers to use operands and place reload instructions for + // registers displaced by the allocation. + // + // However we need to handle some corner cases: + // - pre-assigned defs and uses need to be handled before the other def/use + // operands are processed to avoid the allocation heuristics clashing with + // the pre-assignment. + // - The "free def operands" step has to come last instead of first for tied + // operands and early-clobbers. + UsedInInstr.clear(); - // First scan. - // Mark physreg uses and early clobbers as used. - // Find the end of the virtreg operands - unsigned VirtOpEnd = 0; - bool hasTiedOps = false; - bool hasEarlyClobbers = false; - bool hasPartialRedefs = false; - bool hasPhysDefs = false; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - // Make sure MRI knows about registers clobbered by regmasks. - if (MO.isRegMask()) { - MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); - continue; + // Scan for special cases; Apply pre-assigned register defs to state. + bool HasPhysRegUse = false; + bool HasRegMask = false; + bool HasVRegDef = false; + bool HasDef = false; + bool HasEarlyClobber = false; + bool NeedToAssignLiveThroughs = false; + for (MachineOperand &MO : MI.operands()) { + if (MO.isReg()) { + Register Reg = MO.getReg(); + if (Reg.isVirtual()) { + if (MO.isDef()) { + HasDef = true; + HasVRegDef = true; + if (MO.isEarlyClobber()) { + HasEarlyClobber = true; + NeedToAssignLiveThroughs = true; + } + if (MO.isTied() || (MO.getSubReg() != 0 && !MO.isUndef())) + NeedToAssignLiveThroughs = true; + } + } else if (Reg.isPhysical()) { + if (!MRI->isReserved(Reg)) { + if (MO.isDef()) { + HasDef = true; + bool displacedAny = definePhysReg(MI, Reg); + if (MO.isEarlyClobber()) + HasEarlyClobber = true; + if (!displacedAny) + MO.setIsDead(true); + } + if (MO.readsReg()) + HasPhysRegUse = true; + } + } + } else if (MO.isRegMask()) { + HasRegMask = true; } - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Reg) continue; - if (Register::isVirtualRegister(Reg)) { - VirtOpEnd = i+1; - if (MO.isUse()) { - hasTiedOps = hasTiedOps || - MCID.getOperandConstraint(i, MCOI::TIED_TO) != -1; + } + + // Allocate virtreg defs. + if (HasDef) { + if (HasVRegDef) { + // Special handling for early clobbers, tied operands or subregister defs: + // Compared to "normal" defs these: + // - Must not use a register that is pre-assigned for a use operand. + // - In order to solve tricky inline assembly constraints we change the + // heuristic to figure out a good operand order before doing + // assignments. + if (NeedToAssignLiveThroughs) { + DefOperandIndexes.clear(); + PhysRegUses.clear(); + + // Track number of defs which may consume a register from the class. + std::vector RegClassDefCounts(TRI->getNumRegClasses(), 0); + assert(RegClassDefCounts[0] == 0); + + LLVM_DEBUG(dbgs() << "Need to assign livethroughs\n"); + for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { + const MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg()) + continue; + Register Reg = MO.getReg(); + if (MO.readsReg()) { + if (Reg.isPhysical()) { + LLVM_DEBUG(dbgs() << "mark extra used: " << printReg(Reg, TRI) + << '\n'); + markPhysRegUsedInInstr(Reg); + } + } + + if (MO.isDef()) { + if (Reg.isVirtual()) + DefOperandIndexes.push_back(I); + + addRegClassDefCounts(RegClassDefCounts, Reg); + } + } + + llvm::sort(DefOperandIndexes.begin(), DefOperandIndexes.end(), + [&](uint16_t I0, uint16_t I1) { + const MachineOperand &MO0 = MI.getOperand(I0); + const MachineOperand &MO1 = MI.getOperand(I1); + Register Reg0 = MO0.getReg(); + Register Reg1 = MO1.getReg(); + const TargetRegisterClass &RC0 = *MRI->getRegClass(Reg0); + const TargetRegisterClass &RC1 = *MRI->getRegClass(Reg1); + + // Identify regclass that are easy to use up completely just in this + // instruction. + unsigned ClassSize0 = RegClassInfo.getOrder(&RC0).size(); + unsigned ClassSize1 = RegClassInfo.getOrder(&RC1).size(); + + bool SmallClass0 = ClassSize0 < RegClassDefCounts[RC0.getID()]; + bool SmallClass1 = ClassSize1 < RegClassDefCounts[RC1.getID()]; + if (SmallClass0 > SmallClass1) + return true; + if (SmallClass0 < SmallClass1) + return false; + + // Allocate early clobbers and livethrough operands first. + bool Livethrough0 = MO0.isEarlyClobber() || MO0.isTied() || + (MO0.getSubReg() == 0 && !MO0.isUndef()); + bool Livethrough1 = MO1.isEarlyClobber() || MO1.isTied() || + (MO1.getSubReg() == 0 && !MO1.isUndef()); + if (Livethrough0 > Livethrough1) + return true; + if (Livethrough0 < Livethrough1) + return false; + + // Tie-break rule: operand index. + return I0 < I1; + }); + + for (uint16_t OpIdx : DefOperandIndexes) { + MachineOperand &MO = MI.getOperand(OpIdx); + LLVM_DEBUG(dbgs() << "Allocating " << MO << '\n'); + unsigned Reg = MO.getReg(); + if (MO.isEarlyClobber() || MO.isTied() || + (MO.getSubReg() && !MO.isUndef())) { + defineLiveThroughVirtReg(MI, OpIdx, Reg); + } else { + defineVirtReg(MI, OpIdx, Reg); + } + } } else { - if (MO.isEarlyClobber()) - hasEarlyClobbers = true; - if (MO.getSubReg() && MI.readsVirtualRegister(Reg)) - hasPartialRedefs = true; + // Assign virtual register defs. + for (unsigned I = 0, E = MI.getNumOperands(); I < E; ++I) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef()) + continue; + Register Reg = MO.getReg(); + if (Reg.isVirtual()) + defineVirtReg(MI, I, Reg); + } } - continue; } - if (!MRI->isAllocatable(Reg)) continue; - if (MO.isUse()) { - usePhysReg(MO); - } else if (MO.isEarlyClobber()) { - definePhysReg(MI, Reg, - (MO.isImplicit() || MO.isDead()) ? regFree : regReserved); - hasEarlyClobbers = true; - } else - hasPhysDefs = true; + + // Free registers occupied by defs. + // Iterate operands in reverse order, so we see the implicit super register + // defs first (we added them earlier in case of ). + for (unsigned I = MI.getNumOperands(); I-- > 0;) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef()) + continue; + + // subreg defs don't free the full register. We left the subreg number + // around as a marker in setPhysReg() to recognize this case here. + if (MO.getSubReg() != 0) { + MO.setSubReg(0); + continue; + } + + // Do not free tied operands and early clobbers. + if (MO.isTied() || MO.isEarlyClobber()) + continue; + Register Reg = MO.getReg(); + if (!Reg) + continue; + assert(Reg.isPhysical()); + if (MRI->isReserved(Reg)) + continue; + freePhysReg(Reg); + unmarkRegUsedInInstr(Reg); + } } - // The instruction may have virtual register operands that must be allocated - // the same register at use-time and def-time: early clobbers and tied - // operands. If there are also physical defs, these registers must avoid - // both physical defs and uses, making them more constrained than normal - // operands. - // Similarly, if there are multiple defs and tied operands, we must make - // sure the same register is allocated to uses and defs. - // We didn't detect inline asm tied operands above, so just make this extra - // pass for all inline asm. - if (MI.isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || - (hasTiedOps && (hasPhysDefs || MCID.getNumDefs() > 1))) { - handleThroughOperands(MI, VirtDead); - // Don't attempt coalescing when we have funny stuff going on. - CopyDstReg = Register(); - // Pretend we have early clobbers so the use operands get marked below. - // This is not necessary for the common case of a single tied use. - hasEarlyClobbers = true; + // Displace clobbered registers. + if (HasRegMask) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) { + // MRI bookkeeping. + MRI->addPhysRegsUsedFromRegMask(MO.getRegMask()); + + // Displace clobbered registers. + const uint32_t *Mask = MO.getRegMask(); + for (LiveRegMap::iterator LRI = LiveVirtRegs.begin(), + LRIE = LiveVirtRegs.end(); LRI != LRIE; ++LRI) { + MCPhysReg PhysReg = LRI->PhysReg; + if (PhysReg != 0 && MachineOperand::clobbersPhysReg(Mask, PhysReg)) + displacePhysReg(MI, PhysReg); + } + } + } + } + + // Apply pre-assigned register uses to state. + if (HasPhysRegUse) { + for (MachineOperand &MO : MI.operands()) { + if (!MO.isReg() || !MO.readsReg()) + continue; + Register Reg = MO.getReg(); + if (!Reg.isPhysical()) + continue; + if (MRI->isReserved(Reg)) + continue; + bool displacedAny = usePhysReg(MI, Reg); + if (!displacedAny && !MRI->isReserved(Reg)) + MO.setIsKill(true); + } } - // Second scan. - // Allocate virtreg uses. + // Allocate virtreg uses and insert reloads as necessary. bool HasUndefUse = false; - for (unsigned I = 0; I != VirtOpEnd; ++I) { + for (unsigned I = 0; I < MI.getNumOperands(); ++I) { MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg()) continue; + if (!MO.isReg() || !MO.isUse()) + continue; Register Reg = MO.getReg(); if (!Reg.isVirtual()) continue; - if (MO.isUse()) { - if (MO.isUndef()) { - HasUndefUse = true; - // There is no need to allocate a register for an undef use. - continue; - } - // Populate MayLiveAcrossBlocks in case the use block is allocated before - // the def block (removing the vreg uses). - mayLiveIn(Reg); - - LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg); - MCPhysReg PhysReg = LR.PhysReg; - CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0; - if (setPhysReg(MI, MO, PhysReg)) - killVirtReg(LR); + if (MO.isUndef()) { + HasUndefUse = true; + continue; } + + + // Populate MayLiveAcrossBlocks in case the use block is allocated before + // the def block (removing the vreg uses). + mayLiveIn(Reg); + + + assert(!MO.isInternalRead() && "Bundles not supported"); + assert(MO.readsReg() && "reading use"); + useVirtReg(MI, I, Reg); } // Allocate undef operands. This is a separate step because in a situation @@ -1109,76 +1294,40 @@ } } - // Track registers defined by instruction - early clobbers and tied uses at - // this point. - UsedInInstr.clear(); - if (hasEarlyClobbers) { - for (const MachineOperand &MO : MI.operands()) { - if (!MO.isReg()) continue; - Register Reg = MO.getReg(); - if (!Reg || !Reg.isPhysical()) + // Free early clobbers. + if (HasEarlyClobber) { + for (unsigned I = MI.getNumOperands(); I-- > 0; ) { + MachineOperand &MO = MI.getOperand(I); + if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber()) continue; - // Look for physreg defs and tied uses. - if (!MO.isDef() && !MO.isTied()) continue; - markRegUsedInInstr(Reg); - } - } - - unsigned DefOpEnd = MI.getNumOperands(); - if (MI.isCall()) { - // Spill all virtregs before a call. This serves one purpose: If an - // exception is thrown, the landing pad is going to expect to find - // registers in their spill slots. - // Note: although this is appealing to just consider all definitions - // as call-clobbered, this is not correct because some of those - // definitions may be used later on and we do not want to reuse - // those for virtual registers in between. - LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n"); - spillAll(MI, /*OnlyLiveOut*/ false); - } - - // Third scan. - // Mark all physreg defs as used before allocating virtreg defs. - for (unsigned I = 0; I != DefOpEnd; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) - continue; - Register Reg = MO.getReg(); - - if (!Reg || !Reg.isPhysical() || !MRI->isAllocatable(Reg)) - continue; - definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); - } + // subreg defs don't free the full register. We left the subreg number + // around as a marker in setPhysReg() to recognize this case here. + if (MO.getSubReg() != 0) { + MO.setSubReg(0); + continue; + } - // Fourth scan. - // Allocate defs and collect dead defs. - for (unsigned I = 0; I != DefOpEnd; ++I) { - const MachineOperand &MO = MI.getOperand(I); - if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) - continue; - Register Reg = MO.getReg(); + Register Reg = MO.getReg(); + if (!Reg) + continue; + assert(Reg.isPhysical() && "should have register assigned"); + + // We sometimes get odd situations like: + // early-clobber %x0 = INSTRUCTION %x0 + // which is semantically questionable as the early-clobber should + // apply before the use. But in practice we consider the use to + // happen before the early clobber now. Don't free the early clobber + // register in this case. + if (MI.readsRegister(Reg, TRI)) + continue; - // We have already dealt with phys regs in the previous scan. - if (Reg.isPhysical()) - continue; - MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg); - if (setPhysReg(MI, MI.getOperand(I), PhysReg)) { - VirtDead.push_back(Reg); - CopyDstReg = Register(); // cancel coalescing; - } else - CopyDstReg = (CopyDstReg == Reg || CopyDstReg == PhysReg) ? PhysReg : 0; + freePhysReg(Reg); + } } - // Kill dead defs after the scan to ensure that multiple defs of the same - // register are allocated identically. We didn't need to do this for uses - // because we are creating our own kill flags, and they are always at the last - // use. - for (Register VirtReg : VirtDead) - killVirtReg(VirtReg); - VirtDead.clear(); - LLVM_DEBUG(dbgs() << "<< " << MI); - if (CopyDstReg && CopyDstReg == CopySrcReg && CopyDstSub == CopySrcSub) { + if (MI.isCopy() && MI.getOperand(0).getReg() == MI.getOperand(1).getReg() && + MI.getNumOperands() == 2) { LLVM_DEBUG(dbgs() << "Mark identity copy for removal\n"); Coalesced.push_back(&MI); } @@ -1195,23 +1344,22 @@ if (!Register::isVirtualRegister(Reg)) return; + // Already spilled to a stackslot? + int SS = StackSlotForVirtReg[Reg]; + if (SS != -1) { + // Modify DBG_VALUE now that the value is in a spill slot. + updateDbgValueForSpill(MI, SS); + LLVM_DEBUG(dbgs() << "Rewrite DBG_VALUE for spilled memory: " << MI); + return; + } + // See if this virtual register has already been allocated to a physical // register or spilled to a stack slot. LiveRegMap::iterator LRI = findLiveVirtReg(Reg); if (LRI != LiveVirtRegs.end() && LRI->PhysReg) { setPhysReg(MI, MO, LRI->PhysReg); } else { - int SS = StackSlotForVirtReg[Reg]; - if (SS != -1) { - // Modify DBG_VALUE now that the value is in a spill slot. - updateDbgValueForSpill(MI, SS); - LLVM_DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << MI); - return; - } - - // We can't allocate a physreg for a DebugValue, sorry! - LLVM_DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); - MO.setReg(Register()); + DanglingDbgValues[Reg].push_back(&MI); } // If Reg hasn't been spilled, put this DBG_VALUE in LiveDbgValueMap so @@ -1219,6 +1367,17 @@ LiveDbgValueMap[Reg].push_back(&MI); } +#ifndef NDEBUG +bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const { + for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) { + if (RegUnitStates[*UI] != LR.VirtReg) + return false; + } + + return true; +} +#endif + void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) { this->MBB = &MBB; LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); @@ -1226,18 +1385,15 @@ RegUnitStates.assign(TRI->getNumRegUnits(), regFree); assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); - MachineBasicBlock::iterator MII = MBB.begin(); - - // Add live-in registers as live. - for (const MachineBasicBlock::RegisterMaskPair &LI : MBB.liveins()) - if (MRI->isAllocatable(LI.PhysReg)) - definePhysReg(MII, LI.PhysReg, regReserved); + for (MachineBasicBlock *Succ : MBB.successors()) { + for (const MachineBasicBlock::RegisterMaskPair &LI : Succ->liveins()) + setPhysRegState(LI.PhysReg, regPreAssigned); + } - VirtDead.clear(); Coalesced.clear(); - // Otherwise, sequentially allocate each instruction in the MBB. - for (MachineInstr &MI : MBB) { + // Traverse block in reverse order allocating instructions one by one. + for (MachineInstr &MI : reverse(MBB)) { LLVM_DEBUG( dbgs() << "\n>> " << MI << "Regs:"; dumpState() @@ -1253,9 +1409,14 @@ allocateInstruction(MI); } + LLVM_DEBUG( + dbgs() << "Begin Regs:"; + dumpState() + ); + // Spill all physical registers holding virtual registers now. - LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n"); - spillAll(MBB.getFirstTerminator(), /*OnlyLiveOut*/ true); + LLVM_DEBUG(dbgs() << "Loading live registers at begin of block.\n"); + reloadAtBegin(MBB); // Erase all the coalesced copies. We are delaying it until now because // LiveVirtRegs might refer to the instrs. @@ -1263,6 +1424,20 @@ MBB.erase(MI); NumCoalesced += Coalesced.size(); + for (auto &UDBGPair : DanglingDbgValues) { + for (MachineInstr *DbgValue : UDBGPair.second) { + assert(DbgValue->isDebugValue() && "expected DBG_VALUE"); + MachineOperand &MO = DbgValue->getOperand(0); + // Nothing to do if the vreg was spilled in the meantime. + if (!MO.isReg()) + continue; + LLVM_DEBUG(dbgs() << "Register did not survive for " << *DbgValue + << '\n'); + MO.setReg(0); + } + } + DanglingDbgValues.clear(); + LLVM_DEBUG(MBB.dump()); } @@ -1276,8 +1451,11 @@ MFI = &MF.getFrameInfo(); MRI->freezeReservedRegs(MF); RegClassInfo.runOnMachineFunction(MF); + unsigned NumRegUnits = TRI->getNumRegUnits(); UsedInInstr.clear(); - UsedInInstr.setUniverse(TRI->getNumRegUnits()); + UsedInInstr.setUniverse(NumRegUnits); + PhysRegUses.clear(); + PhysRegUses.setUniverse(NumRegUnits); // initialize the virtual->physical register map to have a 'null' // mapping for all virtual registers Index: llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/darwin-tls-call-clobber.ll @@ -31,9 +31,8 @@ ; This test checks that we don't re-use the register for the variable descriptor ; for the second ldr. ; CHECK: adrp x[[PTR1:[0-9]+]], _t_val@TLVPPAGE -; CHECK: ldr x[[PTR1]], [x[[PTR1]], _t_val@TLVPPAGEOFF] -; CHECK: ldr x[[FPTR:[0-9]+]], [x[[PTR1]]] -; CHECK: mov x0, x[[PTR1]] +; CHECK: ldr x0, [x[[PTR1]], _t_val@TLVPPAGEOFF] +; CHECK: ldr x[[FPTR:[0-9]+]], [x0] ; CHECK: blr x[[FPTR]] define void @_Z4funcPKc(i8* %id) { Index: llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll +++ llvm/test/CodeGen/AArch64/arm64-fast-isel-br.ll @@ -94,7 +94,7 @@ store i32 %c, i32* %c.addr, align 4 store i64 %d, i64* %d.addr, align 8 %0 = load i16, i16* %b.addr, align 2 -; CHECK: tbz w8, #0, LBB4_2 +; CHECK: tbz {{w[0-9]+}}, #0, LBB4_2 %conv = trunc i16 %0 to i1 br i1 %conv, label %if.then, label %if.end Index: llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll +++ llvm/test/CodeGen/AArch64/arm64-fast-isel-call.ll @@ -79,8 +79,7 @@ define i32 @t2() { entry: ; CHECK-LABEL: t2 -; CHECK: mov [[REG1:x[0-9]+]], xzr -; CHECK: mov x0, [[REG1]] +; CHECK: mov x0, xzr ; CHECK: mov w1, #-8 ; CHECK: mov [[REG2:w[0-9]+]], #1023 ; CHECK: uxth w2, [[REG2]] Index: llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll +++ llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll @@ -4,9 +4,8 @@ define i32 @fptosi_wh(half %a) nounwind ssp { entry: ; CHECK-LABEL: fptosi_wh -; CHECK: fcvt s0, h0 -; CHECK: fcvtzs [[REG:w[0-9]+]], s0 -; CHECK: mov w0, [[REG]] +; CHECK: fcvt [[REG:s[0-9]+]], h0 +; CHECK: fcvtzs w0, [[REG]] %conv = fptosi half %a to i32 ret i32 %conv } @@ -15,9 +14,8 @@ define i32 @fptoui_swh(half %a) nounwind ssp { entry: ; CHECK-LABEL: fptoui_swh -; CHECK: fcvt s0, h0 -; CHECK: fcvtzu [[REG:w[0-9]+]], s0 -; CHECK: mov w0, [[REG]] +; CHECK: fcvt [[REG:s[0-9]+]], h0 +; CHECK: fcvtzu w0, [[REG]] %conv = fptoui half %a to i32 ret i32 %conv } @@ -26,8 +24,8 @@ define half @sitofp_hw_i1(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_hw_i1 -; CHECK: sbfx w8, w0, #0, #1 -; CHECK: scvtf s0, w8 +; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1 +; CHECK: scvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = sitofp i1 %a to half ret half %conv @@ -37,8 +35,8 @@ define half @sitofp_hw_i8(i8 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_hw_i8 -; CHECK: sxtb w8, w0 -; CHECK: scvtf s0, w8 +; CHECK: sxtb [[REG:w[0-9]+]], w0 +; CHECK: scvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = sitofp i8 %a to half ret half %conv @@ -48,8 +46,8 @@ define half @sitofp_hw_i16(i16 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_hw_i16 -; CHECK: sxth w8, w0 -; CHECK: scvtf s0, w8 +; CHECK: sxth [[REG:w[0-9]+]], w0 +; CHECK: scvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = sitofp i16 %a to half ret half %conv @@ -79,8 +77,8 @@ define half @uitofp_hw_i1(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: uitofp_hw_i1 -; CHECK: and w8, w0, #0x1 -; CHECK: ucvtf s0, w8 +; CHECK: and [[REG:w[0-9]+]], w0, #0x1 +; CHECK: ucvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = uitofp i1 %a to half ret half %conv @@ -90,8 +88,8 @@ define half @uitofp_hw_i8(i8 %a) nounwind ssp { entry: ; CHECK-LABEL: uitofp_hw_i8 -; CHECK: and w8, w0, #0xff -; CHECK: ucvtf s0, w8 +; CHECK: and [[REG:w[0-9]+]], w0, #0xff +; CHECK: ucvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = uitofp i8 %a to half ret half %conv @@ -101,8 +99,8 @@ define half @uitofp_hw_i16(i16 %a) nounwind ssp { entry: ; CHECK-LABEL: uitofp_hw_i16 -; CHECK: and w8, w0, #0xffff -; CHECK: ucvtf s0, w8 +; CHECK: and [[REG:w[0-9]+]], w0, #0xffff +; CHECK: ucvtf s0, [[REG]] ; CHECK: fcvt h0, s0 %conv = uitofp i16 %a to half ret half %conv Index: llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll +++ llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll @@ -1,4 +1,4 @@ -; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck %s +; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -verify-machineinstrs -mtriple=arm64-apple-darwin -mcpu=cyclone < %s | FileCheck -enable-var-scope %s ;; Test various conversions. define zeroext i32 @trunc_(i8 zeroext %a, i16 zeroext %b, i32 %c, i64 %d) nounwind ssp { @@ -49,13 +49,12 @@ ; CHECK: strh w1, [sp, #12] ; CHECK: str w2, [sp, #8] ; CHECK: str x3, [sp] -; CHECK: ldrb w8, [sp, #15] -; CHECK: strh w8, [sp, #12] -; CHECK: ldrh w8, [sp, #12] -; CHECK: str w8, [sp, #8] -; CHECK: ldr w8, [sp, #8] -; CHECK: ; kill: def $x8 killed $w8 -; CHECK: str x8, [sp] +; CHECK: ldrb [[REG0:w[0-9]+]], [sp, #15] +; CHECK: strh [[REG0]], [sp, #12] +; CHECK: ldrh [[REG1:w[0-9]+]], [sp, #12] +; CHECK: str [[REG1]], [sp, #8] +; CHECK: ldr w[[REG2:[0-9]+]], [sp, #8] +; CHECK: str x[[REG2]], [sp] ; CHECK: ldr x0, [sp] ; CHECK: ret %a.addr = alloca i8, align 1 @@ -105,12 +104,12 @@ ; CHECK: strh w1, [sp, #12] ; CHECK: str w2, [sp, #8] ; CHECK: str x3, [sp] -; CHECK: ldrsb w8, [sp, #15] -; CHECK: strh w8, [sp, #12] -; CHECK: ldrsh w8, [sp, #12] -; CHECK: str w8, [sp, #8] -; CHECK: ldrsw x8, [sp, #8] -; CHECK: str x8, [sp] +; CHECK: ldrsb [[REG0:w[0-9]+]], [sp, #15] +; CHECK: strh [[REG0]], [sp, #12] +; CHECK: ldrsh [[REG1:w[0-9]+]], [sp, #12] +; CHECK: str [[REG1]], [sp, #8] +; CHECK: ldrsw [[REG2:x[0-9]+]], [sp, #8] +; CHECK: str [[REG2]], [sp] ; CHECK: ldr x0, [sp] ; CHECK: ret %a.addr = alloca i8, align 1 @@ -166,8 +165,8 @@ define signext i16 @sext_i1_i16(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: sext_i1_i16 -; CHECK: sbfx w8, w0, #0, #1 -; CHECK-NEXT: sxth w0, w8 +; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1 +; CHECK: sxth w0, [[REG]] %conv = sext i1 %a to i16 ret i16 %conv } @@ -176,8 +175,8 @@ define signext i8 @sext_i1_i8(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: sext_i1_i8 -; CHECK: sbfx w8, w0, #0, #1 -; CHECK-NEXT: sxtb w0, w8 +; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1 +; CHECK: sxtb w0, [[REG]] %conv = sext i1 %a to i8 ret i8 %conv } @@ -240,8 +239,8 @@ define float @sitofp_sw_i1(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_sw_i1 -; CHECK: sbfx w8, w0, #0, #1 -; CHECK: scvtf s0, w8 +; CHECK: sbfx [[REG:w[0-9]+]], w0, #0, #1 +; CHECK: scvtf s0, [[REG]] %conv = sitofp i1 %a to float ret float %conv } @@ -250,8 +249,8 @@ define float @sitofp_sw_i8(i8 %a) nounwind ssp { entry: ; CHECK-LABEL: sitofp_sw_i8 -; CHECK: sxtb w8, w0 -; CHECK: scvtf s0, w8 +; CHECK: sxtb [[REG:w[0-9]+]], w0 +; CHECK: scvtf s0, [[REG]] %conv = sitofp i8 %a to float ret float %conv } @@ -304,8 +303,8 @@ define float @uitofp_sw_i1(i1 %a) nounwind ssp { entry: ; CHECK-LABEL: uitofp_sw_i1 -; CHECK: and w8, w0, #0x1 -; CHECK: ucvtf s0, w8 +; CHECK: and [[REG:w[0-9]+]], w0, #0x1 +; CHECK: ucvtf s0, [[REG]] %conv = uitofp i1 %a to float ret float %conv } @@ -374,7 +373,8 @@ define zeroext i16 @i64_trunc_i16(i64 %a) nounwind ssp { entry: ; CHECK-LABEL: i64_trunc_i16 -; CHECK: and [[REG2:w[0-9]+]], w0, #0xffff +; CHECK: mov x[[TMP:[0-9]+]], x0 +; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0xffff{{$}} ; CHECK: uxth w0, [[REG2]] %conv = trunc i64 %a to i16 ret i16 %conv @@ -383,7 +383,8 @@ define zeroext i8 @i64_trunc_i8(i64 %a) nounwind ssp { entry: ; CHECK-LABEL: i64_trunc_i8 -; CHECK: and [[REG2:w[0-9]+]], w0, #0xff +; CHECK: mov x[[TMP:[0-9]+]], x0 +; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0xff{{$}} ; CHECK: uxtb w0, [[REG2]] %conv = trunc i64 %a to i8 ret i8 %conv @@ -392,7 +393,8 @@ define zeroext i1 @i64_trunc_i1(i64 %a) nounwind ssp { entry: ; CHECK-LABEL: i64_trunc_i1 -; CHECK: and [[REG2:w[0-9]+]], w0, #0x1 +; CHECK: mov x[[TMP:[0-9]+]], x0 +; CHECK: and [[REG2:w[0-9]+]], w[[TMP]], #0x1{{$}} ; CHECK: and w0, [[REG2]], #0x1 %conv = trunc i64 %a to i1 ret i1 %conv Index: llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -210,10 +210,10 @@ ; ; FAST-LABEL: test_vcvt_high_f32_f64: ; FAST: // %bb.0: -; FAST-NEXT: // implicit-def: $q2 ; FAST-NEXT: mov.16b v2, v0 -; FAST-NEXT: fcvtn2 v2.4s, v1.2d +; FAST-NEXT: // implicit-def: $q0 ; FAST-NEXT: mov.16b v0, v2 +; FAST-NEXT: fcvtn2 v0.4s, v1.2d ; FAST-NEXT: ret ; ; GISEL-LABEL: test_vcvt_high_f32_f64: @@ -249,10 +249,10 @@ ; ; FAST-LABEL: test_vcvtx_high_f32_f64: ; FAST: // %bb.0: -; FAST-NEXT: // implicit-def: $q2 ; FAST-NEXT: mov.16b v2, v0 -; FAST-NEXT: fcvtxn2 v2.4s, v1.2d +; FAST-NEXT: // implicit-def: $q0 ; FAST-NEXT: mov.16b v0, v2 +; FAST-NEXT: fcvtxn2 v0.4s, v1.2d ; FAST-NEXT: ret ; ; GISEL-LABEL: test_vcvtx_high_f32_f64: @@ -283,17 +283,12 @@ ; ; FAST-LABEL: to_half: ; FAST: // %bb.0: -; FAST-NEXT: sub sp, sp, #16 // =16 -; FAST-NEXT: .cfi_def_cfa_offset 16 -; FAST-NEXT: fcvt h0, s0 +; FAST-NEXT: fcvt h1, s0 ; FAST-NEXT: // implicit-def: $w0 -; FAST-NEXT: fmov s1, w0 -; FAST-NEXT: mov.16b v1, v0 -; FAST-NEXT: fmov w8, s1 -; FAST-NEXT: mov w0, w8 -; FAST-NEXT: str w0, [sp, #12] // 4-byte Folded Spill -; FAST-NEXT: mov w0, w8 -; FAST-NEXT: add sp, sp, #16 // =16 +; FAST-NEXT: fmov s0, w0 +; FAST-NEXT: mov.16b v0, v1 +; FAST-NEXT: fmov w0, s0 +; FAST-NEXT: // kill: def $w1 killed $w0 ; FAST-NEXT: ret ; ; GISEL-LABEL: to_half: Index: llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll +++ llvm/test/CodeGen/AArch64/arm64_32-fastisel.ll @@ -17,8 +17,9 @@ define void @test_struct_return(i32* %addr) { ; CHECK-LABEL: test_struct_return: ; CHECK: bl _callee -; CHECK-DAG: lsr [[HI:x[0-9]+]], x0, #32 -; CHECK-DAG: str w0 +; CHECK: x[[COPYX0:[0-9]+]], x0 +; CHECK-DAG: lsr [[HI:x[0-9]+]], x[[COPYX0]], #32 +; CHECK-DAG: str w[[COPYX0]] %res = call [2 x i32] @callee() %res.0 = extractvalue [2 x i32] %res, 0 store i32 %res.0, i32* %addr Index: llvm/test/CodeGen/AArch64/arm64_32-null.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64_32-null.ll +++ llvm/test/CodeGen/AArch64/arm64_32-null.ll @@ -13,11 +13,12 @@ define void @test_phi(i8** %p) { ; CHECK-LABEL: test_phi: ; CHECK: mov [[R1:x[0-9]+]], xzr -; CHECK: str [[R1]], [sp] +; CHECK: str [[R1]], [sp, #8] ; CHECK: b [[BB:LBB[0-9_]+]] ; CHECK: [[BB]]: -; CHECK: ldr x0, [sp] -; CHECK: str w0, [x{{.*}}] +; CHECK: ldr x0, [sp, #8] +; CHECK: mov w8, w0 +; CHECK: str w8, [x{{.*}}] bb0: br label %bb1 Index: llvm/test/CodeGen/AArch64/br-cond-not-merge.ll =================================================================== --- llvm/test/CodeGen/AArch64/br-cond-not-merge.ll +++ llvm/test/CodeGen/AArch64/br-cond-not-merge.ll @@ -64,9 +64,9 @@ ; OPT: b.gt [[L:\.LBB[0-9_]+]] ; OPT: tbz w1, #0, [[L]] ; +; NOOPT: str w1, [sp, #[[SLOT2:[0-9]+]]] ; NOOPT: subs w{{[0-9]+}}, w{{[0-9]+}}, #0 ; NOOPT: cset [[R1:w[0-9]+]], gt -; NOOPT: str w1, [sp, #[[SLOT2:[0-9]+]]] ; NOOPT: str [[R1]], [sp, #[[SLOT1:[0-9]+]]] ; NOOPT: b .LBB ; NOOPT: ldr [[R2:w[0-9]+]], [sp, #[[SLOT1]]] Index: llvm/test/CodeGen/AArch64/cmpxchg-O0.ll =================================================================== --- llvm/test/CodeGen/AArch64/cmpxchg-O0.ll +++ llvm/test/CodeGen/AArch64/cmpxchg-O0.ll @@ -1,16 +1,16 @@ -; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=aarch64-linux-gnu -O0 -fast-isel=0 -global-isel=false %s -o - | FileCheck -enable-var-scope %s define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_8: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov [[STATUS:w[3-9]+]], #0 -; CHECK: ldaxrb [[OLD:w[0-9]+]], [x0] +; CHECK: ldaxrb [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], w1, uxtb ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxrb [[STATUS]], w2, [x0] +; CHECK: stlxrb [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: -; CHECK: subs {{w[0-9]+}}, [[OLD]], w1 +; CHECK: subs {{w[0-9]+}}, [[OLD]], w1, uxtb ; CHECK: cset {{w[0-9]+}}, eq %res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic ret { i8, i1 } %res @@ -18,12 +18,12 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_16: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov [[STATUS:w[3-9]+]], #0 -; CHECK: ldaxrh [[OLD:w[0-9]+]], [x0] +; CHECK: ldaxrh [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], w1, uxth ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxrh [[STATUS:w[3-9]]], w2, [x0] +; CHECK: stlxrh [[STATUS:w[3-9]]], w2, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: ; CHECK: subs {{w[0-9]+}}, [[OLD]], w1 @@ -34,12 +34,12 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_32: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov [[STATUS:w[3-9]+]], #0 -; CHECK: ldaxr [[OLD:w[0-9]+]], [x0] +; CHECK: ldaxr [[OLD:w[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], w1 ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxr [[STATUS]], w2, [x0] +; CHECK: stlxr [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: ; CHECK: subs {{w[0-9]+}}, [[OLD]], w1 @@ -50,12 +50,12 @@ define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_64: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: mov [[STATUS:w[3-9]+]], #0 -; CHECK: ldaxr [[OLD:x[0-9]+]], [x0] +; CHECK: ldaxr [[OLD:x[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], x1 ; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxr [[STATUS]], x2, [x0] +; CHECK: stlxr [[STATUS:w[0-9]+]], x2, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: ; CHECK: subs {{x[0-9]+}}, [[OLD]], x1 @@ -66,14 +66,15 @@ define { i128, i1 } @test_cmpxchg_128(i128* %addr, i128 %desired, i128 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_128: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0] +; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD_LO]], x2 ; CHECK: cset [[CMP_TMP:w[0-9]+]], ne ; CHECK: cmp [[OLD_HI]], x3 ; CHECK: cinc [[CMP:w[0-9]+]], [[CMP_TMP]], ne ; CHECK: cbnz [[CMP]], [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxp [[STATUS:w[0-9]+]], x4, x5, [x0] +; CHECK: stlxp [[STATUS:w[0-9]+]], x4, x5, {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: %res = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst monotonic @@ -86,17 +87,18 @@ @var128 = global i128 0 define {i128, i1} @test_cmpxchg_128_unsplit(i128* %addr) { ; CHECK-LABEL: test_cmpxchg_128_unsplit: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: add x[[VAR128:[0-9]+]], {{x[0-9]+}}, :lo12:var128 ; CHECK: ldp [[DESIRED_LO:x[0-9]+]], [[DESIRED_HI:x[0-9]+]], [x[[VAR128]]] ; CHECK: ldp [[NEW_LO:x[0-9]+]], [[NEW_HI:x[0-9]+]], [x[[VAR128]]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0] +; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD_LO]], [[DESIRED_LO]] ; CHECK: cset [[CMP_TMP:w[0-9]+]], ne ; CHECK: cmp [[OLD_HI]], [[DESIRED_HI]] ; CHECK: cinc [[CMP:w[0-9]+]], [[CMP_TMP]], ne ; CHECK: cbnz [[CMP]], [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: stlxp [[STATUS:w[0-9]+]], [[NEW_LO]], [[NEW_HI]], [x0] +; CHECK: stlxp [[STATUS:w[0-9]+]], [[NEW_LO]], [[NEW_HI]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cbnz [[STATUS]], [[RETRY]] ; CHECK: [[DONE]]: Index: llvm/test/CodeGen/AArch64/combine-loads.ll =================================================================== --- llvm/test/CodeGen/AArch64/combine-loads.ll +++ llvm/test/CodeGen/AArch64/combine-loads.ll @@ -6,10 +6,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: ldr x9, [x0, #8] -; CHECK-NEXT: mov v0.d[0], x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: ldr x8, [x0, #8] +; CHECK-NEXT: mov v0.d[0], x9 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %b = load i64, i64* %p %p2 = getelementptr i64, i64* %p, i64 1 Index: llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll =================================================================== --- llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll +++ llvm/test/CodeGen/AArch64/fast-isel-cmpxchg.ll @@ -1,20 +1,19 @@ ; RUN: llc -mtriple=aarch64-- -O0 -fast-isel -fast-isel-abort=4 -verify-machineinstrs < %s | FileCheck %s ; CHECK-LABEL: cmpxchg_monotonic_32: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9_]+]]: -; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0 -; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] -; CHECK-NEXT: cmp [[OLD]], w1 +; CHECK-NEXT: ldaxr w0, {{\[}}[[ADDR]]{{\]}} +; CHECK-NEXT: cmp w0, w1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: stlxr [[STATUS]], w2, [x0] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], w2, {{\[}}[[ADDR]]{{\]}} ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: -; CHECK-NEXT: cmp [[OLD]], w1 -; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: cset [[STATUS]], eq ; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 ; CHECK-NEXT: str [[STATUS32]], [x3] -; CHECK-NEXT: mov w0, [[OLD]] define i32 @cmpxchg_monotonic_32(i32* %p, i32 %cmp, i32 %new, i32* %ps) #0 { %tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new monotonic monotonic %tmp1 = extractvalue { i32, i1 } %tmp0, 0 @@ -26,21 +25,20 @@ ; CHECK-LABEL: cmpxchg_acq_rel_32_load: ; CHECK: // %bb.0: -; CHECK: ldr [[NEW:w[0-9]+]], [x2] +; CHECK: mov [[ADDR:x[0-9]+]], x0 +; CHECK: ldr [[NEW:w[0-9]+]], [x2] ; CHECK-NEXT: [[RETRY:.LBB[0-9_]+]]: -; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0 -; CHECK-NEXT: ldaxr [[OLD:w[0-9]+]], [x0] -; CHECK-NEXT: cmp [[OLD]], w1 +; CHECK-NEXT: ldaxr w0, {{\[}}[[ADDR]]{{\]}} +; CHECK-NEXT: cmp w0, w1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: stlxr [[STATUS]], [[NEW]], [x0] +; CHECK-NEXT: stlxr [[STATUS:w[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}} ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: -; CHECK-NEXT: cmp [[OLD]], w1 -; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: cset [[STATUS]], eq ; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 ; CHECK-NEXT: str [[STATUS32]], [x3] -; CHECK-NEXT: mov w0, [[OLD]] define i32 @cmpxchg_acq_rel_32_load(i32* %p, i32 %cmp, i32* %pnew, i32* %ps) #0 { %new = load i32, i32* %pnew %tmp0 = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel acquire @@ -52,20 +50,19 @@ } ; CHECK-LABEL: cmpxchg_seq_cst_64: +; CHECK: mov [[ADDR:x[0-9]+]], x0 ; CHECK: [[RETRY:.LBB[0-9_]+]]: -; CHECK-NEXT: mov [[STATUS:w[0-9]+]], #0 -; CHECK-NEXT: ldaxr [[OLD:x[0-9]+]], [x0] -; CHECK-NEXT: cmp [[OLD]], x1 +; CHECK-NEXT: ldaxr x0, {{\[}}[[ADDR]]{{\]}} +; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: b.ne [[DONE:.LBB[0-9_]+]] ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: stlxr [[STATUS]], x2, [x0] +; CHECK-NEXT: stlxr [[STATUS]], x2, {{\[}}[[ADDR]]{{\]}} ; CHECK-NEXT: cbnz [[STATUS]], [[RETRY]] ; CHECK-NEXT: [[DONE]]: -; CHECK-NEXT: cmp [[OLD]], x1 +; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: cset [[STATUS:w[0-9]+]], eq ; CHECK-NEXT: and [[STATUS32:w[0-9]+]], [[STATUS]], #0x1 ; CHECK-NEXT: str [[STATUS32]], [x3] -; CHECK-NEXT: mov x0, [[OLD]] define i64 @cmpxchg_seq_cst_64(i64* %p, i64 %cmp, i64 %new, i32* %ps) #0 { %tmp0 = cmpxchg i64* %p, i64 %cmp, i64 %new seq_cst seq_cst %tmp1 = extractvalue { i64, i1 } %tmp0, 0 Index: llvm/test/CodeGen/AArch64/popcount.ll =================================================================== --- llvm/test/CodeGen/AArch64/popcount.ll +++ llvm/test/CodeGen/AArch64/popcount.ll @@ -6,15 +6,15 @@ ; CHECK-LABEL: popcount128: ; CHECK: // %bb.0: // %Entry ; CHECK-NEXT: ldr x8, [x0, #8] -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: cnt v0.16b, v1.16b -; CHECK-NEXT: uaddlv h0, v0.16b -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: uaddlv h1, v0.16b +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret Entry: %1 = load i128, i128* %0, align 16 @@ -32,24 +32,24 @@ ; CHECK: // %bb.0: // %Entry ; CHECK-NEXT: ldr x8, [x0, #8] ; CHECK-NEXT: ldr x9, [x0, #24] -; CHECK-NEXT: ldr d0, [x0, #16] -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov v1.d[1], x9 -; CHECK-NEXT: cnt v0.16b, v1.16b -; CHECK-NEXT: uaddlv h0, v0.16b -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: cnt v0.16b, v1.16b -; CHECK-NEXT: uaddlv h0, v0.16b -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: ldr d1, [x0, #16] +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: uaddlv h1, v0.16b +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: uaddlv h1, v0.16b +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ret Entry: @@ -69,10 +69,10 @@ ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: cnt v0.16b, v0.16b -; CHECK-NEXT: uaddlv h0, v0.16b -; CHECK-NEXT: // implicit-def: $q1 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: uaddlv h1, v0.16b +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: // kill: def $x0 killed $w0 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x1, v0.d[1] Index: llvm/test/CodeGen/AArch64/swift-return.ll =================================================================== --- llvm/test/CodeGen/AArch64/swift-return.ll +++ llvm/test/CodeGen/AArch64/swift-return.ll @@ -203,10 +203,10 @@ ; CHECK-DAG: mov w3, w0 ; CHECK: ret ; CHECK-O0-LABEL: _gen7 -; CHECK-O0: str w0, [sp, #12] -; CHECK-O0: ldr w1, [sp, #12] -; CHECK-O0: ldr w2, [sp, #12] -; CHECK-O0: ldr w3, [sp, #12] +; CHECK-O0: mov w3, w0 +; CHECK-O0: mov w0, w3 +; CHECK-O0: mov w1, w3 +; CHECK-O0: mov w2, w3 define swiftcc { i32, i32, i32, i32 } @gen7(i32 %key) { %v0 = insertvalue { i32, i32, i32, i32 } undef, i32 %key, 0 %v1 = insertvalue { i32, i32, i32, i32 } %v0, i32 %key, 1 @@ -221,10 +221,10 @@ ; CHECK: mov w3, w0 ; CHECK: ret ; CHECK-O0-LABEL: _gen9 -; CHECK-O0: str w0, [sp, #12] -; CHECK-O0: ldr w1, [sp, #12] -; CHECK-O0: ldr w2, [sp, #12] -; CHECK-O0: ldr w3, [sp, #12] +; CHECK-O0: mov w3, w0 +; CHECK-O0: mov w0, w3 +; CHECK-O0: mov w1, w3 +; CHECK-O0: mov w2, w3 define swiftcc { i8, i8, i8, i8 } @gen9(i8 %key) { %v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0 %v1 = insertvalue { i8, i8, i8, i8 } %v0, i8 %key, 1 Index: llvm/test/CodeGen/AArch64/swifterror.ll =================================================================== --- llvm/test/CodeGen/AArch64/swifterror.ll +++ llvm/test/CodeGen/AArch64/swifterror.ll @@ -21,11 +21,10 @@ ; CHECK-O0-LABEL: foo: ; CHECK-O0: mov w{{.*}}, #16 ; CHECK-O0: malloc -; CHECK-O0: mov x1, x0 -; CHECK-O0-NOT: x1 +; CHECK-O0: mov x21, x0 +; CHECK-O0-NOT: x21 ; CHECK-O0: mov [[ID:w[0-9]+]], #1 ; CHECK-O0: strb [[ID]], [x0, #8] -; CHECK-O0: mov x21, x1 entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -138,14 +137,12 @@ ; CHECK-O0: cbz w0 ; CHECK-O0: mov w{{.*}}, #16 ; CHECK-O0: malloc -; CHECK-O0: mov [[ID:x[0-9]+]], x0 +; CHECK-O0: mov x21, x0 ; CHECK-O0: mov [[ID2:w[0-9]+]], #1 ; CHECK-O0: strb [[ID2]], [x0, #8] -; CHECK-O0: mov x21, [[ID]] ; CHECK-O0: ret ; reload from stack -; CHECK-O0: ldr [[ID3:x[0-9]+]], [sp, [[SLOT]]] -; CHECK-O0: mov x21, [[ID3]] +; CHECK-O0: ldr x21, [sp, [[SLOT]]] ; CHECK-O0: ret entry: %cond = icmp ne i32 %cc, 0 @@ -179,10 +176,10 @@ ; CHECK-O0-AARCH64-LABEL: foo_loop: ; spill x21 -; CHECK-O0-AARCH64: str x21, [sp, [[SLOT:#[0-9]+]]] +; CHECK-O0-AARCH64: stur x21, [x29, [[SLOT:#-[0-9]+]]] ; CHECK-O0-AARCH64: b [[BB1:[A-Za-z0-9_]*]] ; CHECK-O0-AARCH64: [[BB1]]: -; CHECK-O0-AARCH64: ldr x0, [sp, [[SLOT]]] +; CHECK-O0-AARCH64: ldur x0, [x29, [[SLOT]]] ; CHECK-O0-AARCH64: str x0, [sp, [[SLOT2:#[0-9]+]]] ; CHECK-O0-AARCH64: cbz {{.*}}, [[BB2:[A-Za-z0-9_]*]] ; CHECK-O0-AARCH64: mov w{{.*}}, #16 @@ -194,11 +191,10 @@ ; CHECK-O0-AARCH64:[[BB2]]: ; CHECK-O0-AARCH64: ldr x0, [sp, [[SLOT2]]] ; CHECK-O0-AARCH64: fcmp -; CHECK-O0-AARCH64: str x0, [sp] +; CHECK-O0-AARCH64: stur x0, [x29, [[SLOT]]] ; CHECK-O0-AARCH64: b.le [[BB1]] ; reload from stack -; CHECK-O0-AARCH64: ldr [[ID3:x[0-9]+]], [sp] -; CHECK-O0-AARCH64: mov x21, [[ID3]] +; CHECK-O0-AARCH64: ldr x21, [sp] ; CHECK-O0-AARCH64: ret ; CHECK-O0-ARM64_32-LABEL: foo_loop: @@ -215,14 +211,12 @@ ; CHECK-O0-ARM64_32: strb w{{.*}}, ; CHECK-O0-ARM64_32:[[BB2]]: ; CHECK-O0-ARM64_32: ldr x0, [sp, [[SLOT2]]] -; CHECK-O0-ARM64_32: fcmp ; CHECK-O0-ARM64_32: str x0, [sp[[OFFSET:.*]]] +; CHECK-O0-ARM64_32: fcmp ; CHECK-O0-ARM64_32: b.le [[BB1]] ; reload from stack -; CHECK-O0-ARM64_32: ldr [[ID3:x[0-9]+]], [sp[[OFFSET]]] -; CHECK-O0-ARM64_32: mov x21, [[ID3]] +; CHECK-O0-ARM64_32: ldr x21, [sp[[OFFSET]]] ; CHECK-O0-ARM64_32: ret - entry: br label %bb_loop @@ -261,16 +255,16 @@ ; CHECK-APPLE-NOT: x21 ; CHECK-O0-LABEL: foo_sret: -; CHECK-O0: mov w{{.*}}, #16 ; spill x8 ; CHECK-O0-DAG: str x8 +; CHECK-O0: mov w{{.*}}, #16 ; CHECK-O0: malloc +; CHECK-O0: mov x10, x0 +; CHECK-O0: mov x21, x10 ; CHECK-O0: mov [[ID:w[0-9]+]], #1 -; CHECK-O0: strb [[ID]], [x0, #8] +; CHECK-O0: strb [[ID]], [x10, #8] ; reload from stack -; CHECK-O0: ldr [[SRET:x[0-9]+]] -; CHECK-O0: str w{{.*}}, [{{.*}}[[SRET]], #4] -; CHECK-O0: mov x21 +; CHECK-O0: str w{{.*}}, [x8, #4] ; CHECK-O0-NOT: x21 entry: %call = call i8* @malloc(i64 16) @@ -299,7 +293,7 @@ ; CHECK-O0-LABEL: caller3: ; spill x0 -; CHECK-O0: str x0 +; CHECK-O0: str x0, [sp, [[OFFSET:#[0-9]+]]] ; CHECK-O0: mov x21 ; CHECK-O0: bl {{.*}}foo_sret ; CHECK-O0: mov [[ID2:x[0-9]+]], x21 @@ -307,8 +301,8 @@ ; CHECK-O0-ARM64_32: cmp x21, #0 ; Access part of the error object and save it to error_ref ; reload from stack +; CHECK-O0: ldr [[ID:x[0-9]+]], [sp, [[OFFSET]]] ; CHECK-O0: ldrb [[CODE:w[0-9]+]] -; CHECK-O0: ldr [[ID:x[0-9]+]] ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]] ; CHECK-O0: bl {{.*}}free entry: @@ -630,11 +624,10 @@ ; Make sure we properly assign registers during fast-isel. ; CHECK-O0-LABEL: testAssign -; CHECK-O0: mov [[TMP:x.*]], xzr -; CHECK-O0: mov x21, [[TMP]] +; CHECK-O0: mov x21, xzr ; CHECK-O0: bl _foo2 ; CHECK-O0: str x21, [s[[STK:.*]]] -; CHECK-O0: ldr x0, [s[[STK]]] +; CHECK-O0: ldr x{{[0-9]+}}, [s[[STK]]] ; CHECK-APPLE-LABEL: testAssign ; CHECK-APPLE: mov x21, xzr Index: llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir =================================================================== --- llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir +++ llvm/test/CodeGen/AArch64/unwind-preserved-from-mir.mir @@ -81,14 +81,14 @@ ; CHECK: frame-setup CFI_INSTRUCTION offset $b21, -240 ; CHECK: frame-setup CFI_INSTRUCTION offset $b22, -256 ; CHECK: frame-setup CFI_INSTRUCTION offset $b23, -272 + ; CHECK: STRQui $q0, $sp, 0 :: (store 16 into %stack.1) ; CHECK: EH_LABEL - ; CHECK: STRQui $q0, $sp, 1 :: (store 16 into %stack.0) - ; CHECK: BL @may_throw_neon, csr_aarch64_aavpcs, implicit-def $lr, implicit $sp, implicit killed $q0, implicit-def $q0 + ; CHECK: BL @may_throw_neon, csr_aarch64_aavpcs, implicit-def dead $lr, implicit $sp, implicit killed $q0, implicit-def $q0 + ; CHECK: STRQui killed $q0, $sp, 1 :: (store 16 into %stack.0) ; CHECK: EH_LABEL - ; CHECK: STRQui killed $q0, $sp, 0 :: (store 16 into %stack.1) ; CHECK: B %bb.1 ; CHECK: bb.1..Lcontinue: - ; CHECK: $q0 = LDRQui $sp, 0 :: (load 16 from %stack.1) + ; CHECK: $q0 = LDRQui $sp, 1 :: (load 16 from %stack.0) ; CHECK: $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load 8 from %stack.3), (load 8 from %stack.2) ; CHECK: $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load 16 from %stack.5), (load 16 from %stack.4) ; CHECK: $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load 16 from %stack.7), (load 16 from %stack.6) @@ -103,7 +103,7 @@ ; CHECK: bb.2..Lunwind (landing-pad): ; CHECK: liveins: $x0, $x1 ; CHECK: EH_LABEL - ; CHECK: $q0 = LDRQui $sp, 1 :: (load 16 from %stack.0) + ; CHECK: $q0 = LDRQui $sp, 0 :: (load 16 from %stack.1) ; CHECK: $fp, $lr = frame-destroy LDPXi $sp, 36 :: (load 8 from %stack.3), (load 8 from %stack.2) ; CHECK: $q9, $q8 = frame-destroy LDPQi $sp, 16 :: (load 16 from %stack.5), (load 16 from %stack.4) ; CHECK: $q11, $q10 = frame-destroy LDPQi $sp, 14 :: (load 16 from %stack.7), (load 16 from %stack.6) Index: llvm/test/CodeGen/AArch64/unwind-preserved.ll =================================================================== --- llvm/test/CodeGen/AArch64/unwind-preserved.ll +++ llvm/test/CodeGen/AArch64/unwind-preserved.ll @@ -50,14 +50,14 @@ ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: .Ltmp0: -; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: bl may_throw_sve ; CHECK-NEXT: .Ltmp1: -; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_1: // %.Lcontinue -; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload @@ -92,7 +92,7 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %.Lunwind ; CHECK-NEXT: .Ltmp2: -; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload @@ -172,14 +172,14 @@ ; GISEL-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG ; GISEL-NEXT: .cfi_offset w30, -8 ; GISEL-NEXT: .cfi_offset w29, -16 +; GISEL-NEXT: str z0, [sp] // 16-byte Folded Spill ; GISEL-NEXT: .Ltmp0: -; GISEL-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill ; GISEL-NEXT: bl may_throw_sve ; GISEL-NEXT: .Ltmp1: -; GISEL-NEXT: str z0, [sp] // 16-byte Folded Spill +; GISEL-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill ; GISEL-NEXT: b .LBB0_1 ; GISEL-NEXT: .LBB0_1: // %.Lcontinue -; GISEL-NEXT: ldr z0, [sp] // 16-byte Folded Reload +; GISEL-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #2 ; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload @@ -214,7 +214,7 @@ ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB0_2: // %.Lunwind ; GISEL-NEXT: .Ltmp2: -; GISEL-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload +; GISEL-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #2 ; GISEL-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; GISEL-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload @@ -293,14 +293,14 @@ ; CHECK-NEXT: .cfi_offset b21, -240 ; CHECK-NEXT: .cfi_offset b22, -256 ; CHECK-NEXT: .cfi_offset b23, -272 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: .Ltmp3: -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl may_throw_neon ; CHECK-NEXT: .Ltmp4: -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: b .LBB1_1 ; CHECK-NEXT: .LBB1_1: // %.Lcontinue -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload ; CHECK-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; CHECK-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload @@ -314,7 +314,7 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: // %.Lunwind ; CHECK-NEXT: .Ltmp5: -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload ; CHECK-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; CHECK-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload @@ -360,13 +360,13 @@ ; GISEL-NEXT: .cfi_offset b21, -240 ; GISEL-NEXT: .cfi_offset b22, -256 ; GISEL-NEXT: .cfi_offset b23, -272 +; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill ; GISEL-NEXT: .Ltmp3: -; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; GISEL-NEXT: bl may_throw_neon +; GISEL-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; GISEL-NEXT: .Ltmp4: -; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill ; GISEL-NEXT: // %bb.1: // %.Lcontinue -; GISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; GISEL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload ; GISEL-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; GISEL-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload @@ -380,7 +380,7 @@ ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB1_2: // %.Lunwind ; GISEL-NEXT: .Ltmp5: -; GISEL-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; GISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload ; GISEL-NEXT: ldp q9, q8, [sp, #256] // 32-byte Folded Reload ; GISEL-NEXT: ldp q11, q10, [sp, #224] // 32-byte Folded Reload Index: llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/inline-asm.ll @@ -28,15 +28,15 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: s_mov_b32 s4, 7 +; CHECK-NEXT: s_mov_b32 s5, 7 ; CHECK-NEXT: ;;#ASMEND ; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: s_mov_b32 s5, 8 +; CHECK-NEXT: s_mov_b32 s4, 8 ; CHECK-NEXT: ;;#ASMEND ; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: s_add_u32 s5, s4, s5 +; CHECK-NEXT: s_add_u32 s4, s5, s4 ; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: v_mov_b32_e32 v0, s5 +; CHECK-NEXT: v_mov_b32_e32 v0, s4 ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: %asm0 = tail call i32 asm "s_mov_b32 $0, 7", "=s"() nounwind Index: llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -17,29 +17,28 @@ ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] -; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 -; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec -; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]] - ; Spill load ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 ; Spill saved exec +; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] ; VMEM: v_writelane_b32 v[[V_SAVEEXEC:[0-9]+]], s[[SAVEEXEC_LO]], 0 ; VMEM: v_writelane_b32 v[[V_SAVEEXEC]], s[[SAVEEXEC_HI]], 1 -; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:20 ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]] ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} ; GCN: s_cbranch_execz [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: ; %bb.{{[0-9]+}}: ; %if +; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD1:v[0-9]+]] -; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload ; GCN: s_waitcnt vmcnt(0) lgkmcnt(0) @@ -53,9 +52,7 @@ ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] - - -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:20 ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET]] ; 4-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 0 ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1 @@ -88,29 +85,26 @@ ; VGPR: workitem_private_segment_byte_size = 16{{$}} ; GCN: {{^}}; %bb.0: - -; GCN: s_mov_b32 m0, -1 -; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] - -; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 - -; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec -; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] +; GCN-DAG: s_mov_b32 m0, -1 +; GCN-DAG: v_mov_b32_e32 [[PTR0:v[0-9]+]], 0{{$}} +; GCN: ds_read_b32 [[LOAD0:v[0-9]+]], [[PTR0]] +; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{[0-9]+}}, v0 ; Spill load ; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec ; Spill saved exec ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] - ; VMEM: v_writelane_b32 v[[V_SAVEEXEC:[0-9]+]], s[[SAVEEXEC_LO]], 0 ; VMEM: v_writelane_b32 v[[V_SAVEEXEC]], s[[SAVEEXEC_HI]], 1 -; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:24 ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET:[0-9]+]] ; 4-byte Folded Spill -; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} +; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] +; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} ; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]] @@ -127,7 +121,7 @@ ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:24 ; 4-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:[[V_EXEC_SPILL_OFFSET]] ; 4-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 0 ; VMEM: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC]], 1 @@ -139,7 +133,7 @@ define amdgpu_kernel void @divergent_loop(i32 addrspace(1)* %out) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %load0 = load volatile i32, i32 addrspace(3)* undef + %load0 = load volatile i32, i32 addrspace(3)* null %cmp0 = icmp eq i32 %tid, 0 br i1 %cmp0, label %loop, label %end @@ -161,8 +155,12 @@ ; GCN-LABEL: {{^}}divergent_if_else_endif: ; GCN: {{^}}; %bb.0: -; GCN: s_mov_b32 m0, -1 -; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] +; GCN-DAG: s_mov_b32 m0, -1 +; GCN-DAG: v_mov_b32_e32 [[PTR0:v[0-9]+]], 0{{$}} +; GCN: ds_read_b32 [[LOAD0:v[0-9]+]], [[PTR0]] + +; Spill load +; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill ; GCN: s_mov_b32 [[ZERO:s[0-9]+]], 0 ; GCN: v_cmp_ne_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], [[ZERO]], v0 @@ -171,9 +169,6 @@ ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}} -; Spill load -; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], 0 offset:[[LOAD0_OFFSET:[0-9]+]] ; 4-byte Folded Spill - ; Spill saved exec ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] @@ -192,7 +187,6 @@ ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] - ; VMEM: buffer_load_dword v[[FLOW_V_RELOAD_SAVEEXEC:[0-9]+]], off, s[0:3], 0 offset:[[SAVEEXEC_OFFSET]] ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[FLOW_V_RELOAD_SAVEEXEC]], 0 @@ -219,8 +213,8 @@ ; GCN: ; %bb.{{[0-9]+}}: ; %if -; GCN: ds_read_b32 ; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload +; GCN: ds_read_b32 ; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]] ; GCN: buffer_store_dword [[ADD]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill ; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]] @@ -248,7 +242,7 @@ define amdgpu_kernel void @divergent_if_else_endif(i32 addrspace(1)* %out) #0 { entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() - %load0 = load volatile i32, i32 addrspace(3)* undef + %load0 = load volatile i32, i32 addrspace(3)* null %cmp0 = icmp eq i32 %tid, 0 br i1 %cmp0, label %if, label %else Index: llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/fast-ra-kills-vcc.mir @@ -0,0 +1,62 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=regallocfast -o - %s | FileCheck %s + +# Make sure incorrect kills aren't emitted on vcc + +--- +name: foo +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: foo + ; CHECK: liveins: $vgpr0 + ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec + ; CHECK: $sgpr4_sgpr5 = COPY $vcc + ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $vcc, implicit $exec + ; CHECK: S_ENDPGM 0, implicit killed $vgpr0, implicit killed $sgpr4_sgpr5 + %0:vgpr_32 = COPY $vgpr0 + V_CMP_NE_U32_e32 0, %0, implicit-def $vcc, implicit $exec + $sgpr4_sgpr5 = COPY $vcc + %1:sreg_64_xexec = COPY $vcc + %2:vgpr_32 = V_CNDMASK_B32_e64 0, -1, 0, 3, %1, implicit $exec + $vgpr0 = COPY %2 + S_ENDPGM 0, implicit $vgpr0, implicit $sgpr4_sgpr5 + +... + +# This would hit "Unexpected reg unit state" assert. +--- +name: bar +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: bar + ; CHECK: liveins: $vgpr0 + ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec + ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc + ; CHECK: SI_SPILL_S64_SAVE $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) + ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc + ; CHECK: $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5) + ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $sgpr4_sgpr5, implicit $exec + ; CHECK: S_ENDPGM 0, implicit killed $vgpr0, implicit killed renamable $vcc + %0:vgpr_32 = COPY $vgpr0 + V_CMP_NE_U32_e32 0, %0, implicit-def $vcc, implicit $exec + %3:sreg_64_xexec = COPY $vcc + %1:sreg_64_xexec = COPY $vcc + %2:vgpr_32 = V_CNDMASK_B32_e64 0, -1, 0, 3, %1, implicit $exec + $vgpr0 = COPY %2 + S_ENDPGM 0, implicit $vgpr0, implicit %3 + +... Index: llvm/test/CodeGen/AMDGPU/fastregalloc-illegal-subreg-physreg.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/fastregalloc-illegal-subreg-physreg.mir @@ -0,0 +1,27 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=regallocfast -o - %s | FileCheck %s + +# This would hit "Illegal subregister index for physical register" verifier error since +# tied operands would skip dropping the subregister index. + +--- +name: invalid_subreg_index +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + bb.0: + liveins: $vgpr0, $sgpr0 + + ; CHECK-LABEL: name: invalid_subreg_index + ; CHECK: liveins: $vgpr0, $sgpr0 + ; CHECK: $m0 = COPY renamable $sgpr0 + ; CHECK: undef renamable $vgpr1 = V_INTERP_P2_F32 undef $vgpr1, undef $vgpr0, 0, 1, implicit $mode, implicit $m0, implicit $exec, implicit-def dead $vgpr0_vgpr1 + ; CHECK: S_ENDPGM 0, implicit killed renamable $sgpr0 + %0:vgpr_32 = COPY $vgpr0 + %1:sgpr_32 = COPY $sgpr0 + $m0 = COPY %1 + undef %2.sub1:vreg_64 = V_INTERP_P2_F32 undef %2.sub1, undef %0:vgpr_32, 0, 1, implicit $mode, implicit $m0, implicit $exec + S_ENDPGM 0, implicit %1 + +... Index: llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir +++ llvm/test/CodeGen/AMDGPU/fastregalloc-self-loop-heuristic.mir @@ -18,7 +18,7 @@ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec - ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: bb.2: ; GCN: S_ENDPGM 0 @@ -53,9 +53,10 @@ ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec - ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, implicit $exec ; GCN: renamable $vgpr2 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec - ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, killed renamable $vgpr2, 0, 0, 0, 0, implicit $exec + ; GCN: SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: bb.2: ; GCN: S_ENDPGM 0 @@ -92,9 +93,10 @@ ; GCN: SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.0, align 4, addrspace 5) ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; GCN: renamable $vgpr0 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec - ; GCN: $vgpr1_vgpr2 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) - ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr1_vgpr2, killed renamable $vgpr0, 0, 0, 0, 0, implicit $exec + ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: renamable $vgpr2 = V_ADD_U32_e32 1, undef $vgpr0, implicit $exec + ; GCN: SI_SPILL_V32_SAVE $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, renamable $vgpr2, 0, 0, 0, 0, implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: bb.2: ; GCN: S_ENDPGM 0 @@ -128,9 +130,9 @@ ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) - ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, 0, 0, implicit $exec - ; GCN: renamable $vgpr2 = V_ADD_U32_e64 1, 1, 0, implicit $exec - ; GCN: SI_SPILL_V32_SAVE killed $vgpr2, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr0, 0, 0, 0, 0, implicit $exec + ; GCN: renamable $vgpr0 = V_ADD_U32_e64 1, 1, 0, implicit $exec + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: bb.2: ; GCN: S_ENDPGM 0 @@ -164,9 +166,8 @@ ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GCN: $vgpr0_vgpr1 = SI_SPILL_V64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) - ; GCN: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr2_vgpr3 - ; GCN: GLOBAL_STORE_DWORD killed renamable $vgpr0_vgpr1, undef renamable $vgpr3, 0, 0, 0, 0, implicit $exec - ; GCN: SI_SPILL_V64_SAVE killed $vgpr2_vgpr3, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 8 into %stack.1, align 4, addrspace 5) + ; GCN: undef renamable $vgpr3 = GLOBAL_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit-def dead $vgpr2_vgpr3 + ; GCN: GLOBAL_STORE_DWORD renamable $vgpr0_vgpr1, undef renamable $vgpr1, 0, 0, 0, 0, implicit $exec ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec ; GCN: bb.2: ; GCN: S_ENDPGM 0 Index: llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll +++ llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -12,101 +12,96 @@ ; GCN: bb.0.entry: ; GCN: successors: %bb.1(0x80000000) ; GCN: liveins: $vgpr0, $sgpr0_sgpr1 + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5) ; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4) - ; GCN: renamable $sgpr2 = COPY renamable $sgpr1 + ; GCN: renamable $sgpr6 = COPY renamable $sgpr1 ; GCN: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 - ; GCN: renamable $sgpr1 = S_MOV_B32 61440 - ; GCN: renamable $sgpr3 = S_MOV_B32 -1 - ; GCN: undef renamable $sgpr4 = COPY killed renamable $sgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7 - ; GCN: renamable $sgpr5 = COPY killed renamable $sgpr2 - ; GCN: renamable $sgpr6 = COPY killed renamable $sgpr3 - ; GCN: renamable $sgpr7 = COPY killed renamable $sgpr1 + ; GCN: renamable $sgpr4 = S_MOV_B32 61440 + ; GCN: renamable $sgpr5 = S_MOV_B32 -1 + ; GCN: undef renamable $sgpr0 = COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 + ; GCN: renamable $sgpr1 = COPY killed renamable $sgpr6 + ; GCN: renamable $sgpr2 = COPY killed renamable $sgpr5 + ; GCN: renamable $sgpr3 = COPY killed renamable $sgpr4 + ; GCN: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.2, align 4, addrspace 5) ; GCN: renamable $sgpr0 = S_MOV_B32 16 ; GCN: renamable $sgpr1 = S_MOV_B32 15 ; GCN: renamable $sgpr2 = S_MOV_B32 14 ; GCN: renamable $sgpr3 = S_MOV_B32 13 - ; GCN: renamable $sgpr8 = S_MOV_B32 12 - ; GCN: renamable $sgpr9 = S_MOV_B32 11 - ; GCN: renamable $sgpr10 = S_MOV_B32 10 - ; GCN: renamable $sgpr11 = S_MOV_B32 9 - ; GCN: renamable $sgpr12 = S_MOV_B32 8 - ; GCN: renamable $sgpr13 = S_MOV_B32 7 - ; GCN: renamable $sgpr14 = S_MOV_B32 6 - ; GCN: renamable $sgpr15 = S_MOV_B32 5 - ; GCN: renamable $sgpr16 = S_MOV_B32 3 - ; GCN: renamable $sgpr17 = S_MOV_B32 2 - ; GCN: renamable $sgpr18 = S_MOV_B32 1 - ; GCN: renamable $sgpr19 = S_MOV_B32 0 - ; GCN: renamable $vgpr1 = COPY killed renamable $sgpr19 - ; GCN: renamable $vgpr2 = COPY killed renamable $sgpr18 - ; GCN: renamable $vgpr3 = COPY killed renamable $sgpr17 - ; GCN: renamable $vgpr4 = COPY killed renamable $sgpr16 - ; GCN: renamable $vgpr5 = COPY killed renamable $sgpr15 - ; GCN: renamable $vgpr6 = COPY killed renamable $sgpr14 - ; GCN: renamable $vgpr7 = COPY killed renamable $sgpr13 - ; GCN: renamable $vgpr8 = COPY killed renamable $sgpr12 - ; GCN: renamable $vgpr9 = COPY killed renamable $sgpr11 - ; GCN: renamable $vgpr10 = COPY killed renamable $sgpr10 - ; GCN: renamable $vgpr11 = COPY killed renamable $sgpr9 - ; GCN: renamable $vgpr12 = COPY killed renamable $sgpr8 - ; GCN: renamable $vgpr13 = COPY killed renamable $sgpr3 - ; GCN: renamable $vgpr14 = COPY killed renamable $sgpr2 - ; GCN: renamable $vgpr15 = COPY killed renamable $sgpr1 + ; GCN: renamable $sgpr4 = S_MOV_B32 12 + ; GCN: renamable $sgpr5 = S_MOV_B32 11 + ; GCN: renamable $sgpr6 = S_MOV_B32 10 + ; GCN: renamable $sgpr7 = S_MOV_B32 9 + ; GCN: renamable $sgpr8 = S_MOV_B32 8 + ; GCN: renamable $sgpr9 = S_MOV_B32 7 + ; GCN: renamable $sgpr10 = S_MOV_B32 6 + ; GCN: renamable $sgpr11 = S_MOV_B32 5 + ; GCN: renamable $sgpr12 = S_MOV_B32 3 + ; GCN: renamable $sgpr13 = S_MOV_B32 2 + ; GCN: renamable $sgpr14 = S_MOV_B32 1 + ; GCN: renamable $sgpr15 = S_MOV_B32 0 + ; GCN: renamable $vgpr0 = COPY killed renamable $sgpr15 + ; GCN: renamable $vgpr30 = COPY killed renamable $sgpr14 + ; GCN: renamable $vgpr29 = COPY killed renamable $sgpr13 + ; GCN: renamable $vgpr28 = COPY killed renamable $sgpr12 + ; GCN: renamable $vgpr27 = COPY killed renamable $sgpr11 + ; GCN: renamable $vgpr26 = COPY killed renamable $sgpr10 + ; GCN: renamable $vgpr25 = COPY killed renamable $sgpr9 + ; GCN: renamable $vgpr24 = COPY killed renamable $sgpr8 + ; GCN: renamable $vgpr23 = COPY killed renamable $sgpr7 + ; GCN: renamable $vgpr22 = COPY killed renamable $sgpr6 + ; GCN: renamable $vgpr21 = COPY killed renamable $sgpr5 + ; GCN: renamable $vgpr20 = COPY killed renamable $sgpr4 + ; GCN: renamable $vgpr19 = COPY killed renamable $sgpr3 + ; GCN: renamable $vgpr18 = COPY killed renamable $sgpr2 + ; GCN: renamable $vgpr17 = COPY killed renamable $sgpr1 ; GCN: renamable $vgpr16 = COPY killed renamable $sgpr0 - ; GCN: undef renamable $vgpr17 = COPY killed renamable $vgpr1, implicit-def $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 - ; GCN: renamable $vgpr18 = COPY killed renamable $vgpr2 - ; GCN: renamable $vgpr19 = COPY killed renamable $vgpr3 - ; GCN: renamable $vgpr20 = COPY killed renamable $vgpr4 - ; GCN: renamable $vgpr21 = COPY killed renamable $vgpr5 - ; GCN: renamable $vgpr22 = COPY killed renamable $vgpr6 - ; GCN: renamable $vgpr23 = COPY killed renamable $vgpr7 - ; GCN: renamable $vgpr24 = COPY killed renamable $vgpr8 - ; GCN: renamable $vgpr25 = COPY killed renamable $vgpr9 - ; GCN: renamable $vgpr26 = COPY killed renamable $vgpr10 - ; GCN: renamable $vgpr27 = COPY killed renamable $vgpr11 - ; GCN: renamable $vgpr28 = COPY killed renamable $vgpr12 - ; GCN: renamable $vgpr29 = COPY killed renamable $vgpr13 - ; GCN: renamable $vgpr30 = COPY killed renamable $vgpr14 - ; GCN: renamable $vgpr31 = COPY killed renamable $vgpr15 - ; GCN: renamable $vgpr32 = COPY killed renamable $vgpr16 + ; GCN: undef renamable $vgpr0 = COPY killed renamable $vgpr0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 + ; GCN: renamable $vgpr1 = COPY killed renamable $vgpr30 + ; GCN: renamable $vgpr2 = COPY killed renamable $vgpr29 + ; GCN: renamable $vgpr3 = COPY killed renamable $vgpr28 + ; GCN: renamable $vgpr4 = COPY killed renamable $vgpr27 + ; GCN: renamable $vgpr5 = COPY killed renamable $vgpr26 + ; GCN: renamable $vgpr6 = COPY killed renamable $vgpr25 + ; GCN: renamable $vgpr7 = COPY killed renamable $vgpr24 + ; GCN: renamable $vgpr8 = COPY killed renamable $vgpr23 + ; GCN: renamable $vgpr9 = COPY killed renamable $vgpr22 + ; GCN: renamable $vgpr10 = COPY killed renamable $vgpr21 + ; GCN: renamable $vgpr11 = COPY killed renamable $vgpr20 + ; GCN: renamable $vgpr12 = COPY killed renamable $vgpr19 + ; GCN: renamable $vgpr13 = COPY killed renamable $vgpr18 + ; GCN: renamable $vgpr14 = COPY killed renamable $vgpr17 + ; GCN: renamable $vgpr15 = COPY killed renamable $vgpr16 + ; GCN: SI_SPILL_V512_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.1, align 4, addrspace 5) ; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec - ; GCN: renamable $vgpr1 = IMPLICIT_DEF - ; GCN: renamable $sgpr2_sgpr3 = IMPLICIT_DEF - ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; GCN: SI_SPILL_S128_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.1, align 4, addrspace 5) - ; GCN: SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5) - ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5) - ; GCN: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) - ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) + ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) + ; GCN: renamable $vgpr0 = IMPLICIT_DEF + ; GCN: renamable $sgpr0_sgpr1 = IMPLICIT_DEF ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.3(0x40000000) - ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.5, align 4, addrspace 5) - ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5) - ; GCN: $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) - ; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec - ; GCN: renamable $sgpr4_sgpr5 = V_CMP_EQ_U32_e64 $sgpr2, killed $vgpr1, implicit $exec - ; GCN: renamable $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.4, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.4, align 4, addrspace 5) + ; GCN: $vgpr17 = SI_SPILL_V32_RESTORE %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.5, addrspace 5) + ; GCN: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = SI_SPILL_V512_RESTORE %stack.1, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 64 from %stack.1, align 4, addrspace 5) + ; GCN: $vgpr16 = SI_SPILL_V32_RESTORE %stack.3, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.3, addrspace 5) + ; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr16, implicit $exec + ; GCN: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, $vgpr16, implicit $exec + ; GCN: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec ; GCN: S_SET_GPR_IDX_ON killed renamable $sgpr2, 1, implicit-def $m0, implicit-def undef $mode, implicit $m0, implicit $mode - ; GCN: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = SI_SPILL_V512_RESTORE %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 64 from %stack.2, align 4, addrspace 5) - ; GCN: renamable $vgpr18 = V_MOV_B32_e32 $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0 + ; GCN: renamable $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $m0 + ; GCN: SI_SPILL_V32_SAVE $vgpr0, %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.6, addrspace 5) ; GCN: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode - ; GCN: renamable $vgpr19 = COPY renamable $vgpr18 - ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr4_sgpr5 - ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) - ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.6, align 4, addrspace 5) - ; GCN: SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) - ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) - ; GCN: SI_SPILL_V32_SAVE killed $vgpr18, %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) - ; GCN: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.5, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.5, addrspace 5) + ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1 + ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.4, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.4, align 4, addrspace 5) + ; GCN: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc ; GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec ; GCN: bb.3: ; GCN: successors: %bb.2(0x80000000) - ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.3, align 4, addrspace 5) - ; GCN: $exec = S_MOV_B64 killed renamable $sgpr0_sgpr1 + ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5) + ; GCN: $exec = S_MOV_B64 renamable $sgpr0_sgpr1 ; GCN: bb.2: - ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) - ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 16 from %stack.1, align 4, addrspace 5) - ; GCN: BUFFER_STORE_DWORD_OFFSET renamable $vgpr0, renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1) + ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.6, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (load 4 from %stack.6, addrspace 5) + ; GCN: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.2, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 16 from %stack.2, align 4, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1) ; GCN: S_ENDPGM 0 entry: %id = call i32 @llvm.amdgcn.workitem.id.x() #1 Index: llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll +++ llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll @@ -236,17 +236,18 @@ ; W64-O0-DAG: s_mov_b32 [[IDX_S:s[0-9]+]], s{{[0-9]+}} ; W64-O0-DAG: v_mov_b32_e32 [[IDX_V:v[0-9]+]], s{{[0-9]+}} ; W64-O0-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec -; W64-O0-DAG: buffer_store_dword [[IDX_V]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill +; W64-O0-DAG: buffer_store_dword [[IDX_V]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill -; W64-O0: [[LOOPBB0:BB[0-9]+_[0-9]+]]: -; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:16 ; 4-byte Folded Reload +; W64-O0: [[LOOPBB0:BB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1 +; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:28 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:32 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:36 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:40 ; 4-byte Folded Reload ; W64-O0: s_waitcnt vmcnt(0) -; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP0:[0-9]+]], v[[VRSRC0]] +; W64-O0-DAG: v_readfirstlane_b32 s[[S0:[0-9]+]], v[[VRSRC0]] ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]] -; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[SRSRCTMP0]] +; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[S0]] ; W64-O0-DAG: s_mov_b32 s[[SRSRC1:[0-9]+]], s[[SRSRCTMP1]] ; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}} ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP2:[0-9]+]], v[[VRSRC2]] @@ -255,37 +256,37 @@ ; W64-O0-DAG: s_mov_b32 s[[SRSRC3:[0-9]+]], s[[SRSRCTMP3]] ; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}} ; W64-O0-DAG: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]] -; W64-O0-DAG: s_mov_b32 s[[S0:[0-9]+]], s[[SRSRCTMP0]] ; W64-O0-DAG: s_mov_b32 s[[S1:[0-9]+]], s[[SRSRCTMP1]] ; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]] ; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]] ; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]] -; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF]] ; 4-byte Folded Reload ; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[S0]]:[[S3]]{{\]}}, {{.*}} idxen ; W64-O0: s_waitcnt vmcnt(0) ; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill ; W64-O0: s_xor_b64 exec, exec, [[SAVE]] ; W64-O0-NEXT: s_cbranch_execnz [[LOOPBB0]] -; CHECK-O0: s_mov_b64 exec, [[SAVEEXEC]] + +; XXX-W64-O0: s_mov_b64 exec, [[SAVEEXEC]] ; W64-O0: buffer_load_dword [[RES:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload ; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF:[0-9]+]] ; 4-byte Folded Spill ; W64-O0: s_cbranch_execz [[TERMBB:BB[0-9]+_[0-9]+]] -; W64-O0: ; %bb.{{[0-9]+}}: +; W64-O0: ; %bb.{{[0-9]+}}: ; %bb1 +; W64-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill ; W64-O0-DAG: s_mov_b64 s{{\[}}[[SAVEEXEC0:[0-9]+]]:[[SAVEEXEC1:[0-9]+]]{{\]}}, exec -; W64-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill ; W64-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC0]], [[SAVEEXEC_IDX0:[0-9]+]] -; W64-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]] +; W64-O0: v_writelane_b32 [[VSAVEEXEC]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]] -; W64-O0: [[LOOPBB1:BB[0-9]+_[0-9]+]]: -; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:28 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:32 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:36 ; 4-byte Folded Reload -; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:40 ; 4-byte Folded Reload +; W64-O0: [[LOOPBB1:BB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1 +; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF]] ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload +; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:16 ; 4-byte Folded Reload ; W64-O0: s_waitcnt vmcnt(0) -; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP0:[0-9]+]], v[[VRSRC0]] +; W64-O0-DAG: v_readfirstlane_b32 s[[S0:[0-9]+]], v[[VRSRC0]] ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]] -; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[SRSRCTMP0]] +; W64-O0-DAG: s_mov_b32 s[[SRSRC0:[0-9]+]], s[[S0]] ; W64-O0-DAG: s_mov_b32 s[[SRSRC1:[0-9]+]], s[[SRSRCTMP1]] ; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC0]]:[[SRSRC1]]{{\]}}, v{{\[}}[[VRSRC0]]:[[VRSRC1]]{{\]}} ; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP2:[0-9]+]], v[[VRSRC2]] @@ -294,12 +295,10 @@ ; W64-O0-DAG: s_mov_b32 s[[SRSRC3:[0-9]+]], s[[SRSRCTMP3]] ; W64-O0-DAG: v_cmp_eq_u64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], s{{\[}}[[SRSRC2]]:[[SRSRC3]]{{\]}}, v{{\[}}[[VRSRC2]]:[[VRSRC3]]{{\]}} ; W64-O0-DAG: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]] -; W64-O0-DAG: s_mov_b32 s[[S0:[0-9]+]], s[[SRSRCTMP0]] ; W64-O0-DAG: s_mov_b32 s[[S1:[0-9]+]], s[[SRSRCTMP1]] ; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]] ; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]] ; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]] -; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[IDX_OFF]] ; 4-byte Folded Reload ; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s{{\[}}[[S0]]:[[S3]]{{\]}}, {{.*}} idxen ; W64-O0: s_waitcnt vmcnt(0) ; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill Index: llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -15,381 +15,379 @@ ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 0 +; GCN-NEXT: v_writelane_b32 v0, s5, 1 +; GCN-NEXT: v_writelane_b32 v0, s6, 2 +; GCN-NEXT: v_writelane_b32 v0, s7, 3 +; GCN-NEXT: v_writelane_b32 v0, s8, 4 +; GCN-NEXT: v_writelane_b32 v0, s9, 5 +; GCN-NEXT: v_writelane_b32 v0, s10, 6 +; GCN-NEXT: v_writelane_b32 v0, s11, 7 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[12:19] +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 8 +; GCN-NEXT: v_writelane_b32 v0, s5, 9 +; GCN-NEXT: v_writelane_b32 v0, s6, 10 +; GCN-NEXT: v_writelane_b32 v0, s7, 11 +; GCN-NEXT: v_writelane_b32 v0, s8, 12 +; GCN-NEXT: v_writelane_b32 v0, s9, 13 +; GCN-NEXT: v_writelane_b32 v0, s10, 14 +; GCN-NEXT: v_writelane_b32 v0, s11, 15 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 16 +; GCN-NEXT: v_writelane_b32 v0, s5, 17 +; GCN-NEXT: v_writelane_b32 v0, s6, 18 +; GCN-NEXT: v_writelane_b32 v0, s7, 19 +; GCN-NEXT: v_writelane_b32 v0, s8, 20 +; GCN-NEXT: v_writelane_b32 v0, s9, 21 +; GCN-NEXT: v_writelane_b32 v0, s10, 22 +; GCN-NEXT: v_writelane_b32 v0, s11, 23 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 24 +; GCN-NEXT: v_writelane_b32 v0, s5, 25 +; GCN-NEXT: v_writelane_b32 v0, s6, 26 +; GCN-NEXT: v_writelane_b32 v0, s7, 27 +; GCN-NEXT: v_writelane_b32 v0, s8, 28 +; GCN-NEXT: v_writelane_b32 v0, s9, 29 +; GCN-NEXT: v_writelane_b32 v0, s10, 30 +; GCN-NEXT: v_writelane_b32 v0, s11, 31 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[20:27] +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 32 +; GCN-NEXT: v_writelane_b32 v0, s5, 33 +; GCN-NEXT: v_writelane_b32 v0, s6, 34 +; GCN-NEXT: v_writelane_b32 v0, s7, 35 +; GCN-NEXT: v_writelane_b32 v0, s8, 36 +; GCN-NEXT: v_writelane_b32 v0, s9, 37 +; GCN-NEXT: v_writelane_b32 v0, s10, 38 +; GCN-NEXT: v_writelane_b32 v0, s11, 39 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[36:43] +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 40 +; GCN-NEXT: v_writelane_b32 v0, s5, 41 +; GCN-NEXT: v_writelane_b32 v0, s6, 42 +; GCN-NEXT: v_writelane_b32 v0, s7, 43 +; GCN-NEXT: v_writelane_b32 v0, s8, 44 +; GCN-NEXT: v_writelane_b32 v0, s9, 45 +; GCN-NEXT: v_writelane_b32 v0, s10, 46 +; GCN-NEXT: v_writelane_b32 v0, s11, 47 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[44:51] +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 48 +; GCN-NEXT: v_writelane_b32 v0, s5, 49 +; GCN-NEXT: v_writelane_b32 v0, s6, 50 +; GCN-NEXT: v_writelane_b32 v0, s7, 51 +; GCN-NEXT: v_writelane_b32 v0, s8, 52 +; GCN-NEXT: v_writelane_b32 v0, s9, 53 +; GCN-NEXT: v_writelane_b32 v0, s10, 54 +; GCN-NEXT: v_writelane_b32 v0, s11, 55 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[52:59] +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 56 +; GCN-NEXT: v_writelane_b32 v0, s5, 57 +; GCN-NEXT: v_writelane_b32 v0, s6, 58 +; GCN-NEXT: v_writelane_b32 v0, s7, 59 +; GCN-NEXT: v_writelane_b32 v0, s8, 60 +; GCN-NEXT: v_writelane_b32 v0, s9, 61 +; GCN-NEXT: v_writelane_b32 v0, s10, 62 +; GCN-NEXT: v_writelane_b32 v0, s11, 63 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 0 +; GCN-NEXT: v_writelane_b32 v1, s5, 1 +; GCN-NEXT: v_writelane_b32 v1, s6, 2 +; GCN-NEXT: v_writelane_b32 v1, s7, 3 +; GCN-NEXT: v_writelane_b32 v1, s8, 4 +; GCN-NEXT: v_writelane_b32 v1, s9, 5 +; GCN-NEXT: v_writelane_b32 v1, s10, 6 +; GCN-NEXT: v_writelane_b32 v1, s11, 7 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[60:67] +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 8 +; GCN-NEXT: v_writelane_b32 v1, s5, 9 +; GCN-NEXT: v_writelane_b32 v1, s6, 10 +; GCN-NEXT: v_writelane_b32 v1, s7, 11 +; GCN-NEXT: v_writelane_b32 v1, s8, 12 +; GCN-NEXT: v_writelane_b32 v1, s9, 13 +; GCN-NEXT: v_writelane_b32 v1, s10, 14 +; GCN-NEXT: v_writelane_b32 v1, s11, 15 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 16 +; GCN-NEXT: v_writelane_b32 v1, s5, 17 +; GCN-NEXT: v_writelane_b32 v1, s6, 18 +; GCN-NEXT: v_writelane_b32 v1, s7, 19 +; GCN-NEXT: v_writelane_b32 v1, s8, 20 +; GCN-NEXT: v_writelane_b32 v1, s9, 21 +; GCN-NEXT: v_writelane_b32 v1, s10, 22 +; GCN-NEXT: v_writelane_b32 v1, s11, 23 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 24 +; GCN-NEXT: v_writelane_b32 v1, s5, 25 +; GCN-NEXT: v_writelane_b32 v1, s6, 26 +; GCN-NEXT: v_writelane_b32 v1, s7, 27 +; GCN-NEXT: v_writelane_b32 v1, s8, 28 +; GCN-NEXT: v_writelane_b32 v1, s9, 29 +; GCN-NEXT: v_writelane_b32 v1, s10, 30 +; GCN-NEXT: v_writelane_b32 v1, s11, 31 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 32 +; GCN-NEXT: v_writelane_b32 v1, s5, 33 +; GCN-NEXT: v_writelane_b32 v1, s6, 34 +; GCN-NEXT: v_writelane_b32 v1, s7, 35 +; GCN-NEXT: v_writelane_b32 v1, s8, 36 +; GCN-NEXT: v_writelane_b32 v1, s9, 37 +; GCN-NEXT: v_writelane_b32 v1, s10, 38 +; GCN-NEXT: v_writelane_b32 v1, s11, 39 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 40 +; GCN-NEXT: v_writelane_b32 v1, s5, 41 +; GCN-NEXT: v_writelane_b32 v1, s6, 42 +; GCN-NEXT: v_writelane_b32 v1, s7, 43 +; GCN-NEXT: v_writelane_b32 v1, s8, 44 +; GCN-NEXT: v_writelane_b32 v1, s9, 45 +; GCN-NEXT: v_writelane_b32 v1, s10, 46 +; GCN-NEXT: v_writelane_b32 v1, s11, 47 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[68:75] +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 48 +; GCN-NEXT: v_writelane_b32 v1, s5, 49 +; GCN-NEXT: v_writelane_b32 v1, s6, 50 +; GCN-NEXT: v_writelane_b32 v1, s7, 51 +; GCN-NEXT: v_writelane_b32 v1, s8, 52 +; GCN-NEXT: v_writelane_b32 v1, s9, 53 +; GCN-NEXT: v_writelane_b32 v1, s10, 54 +; GCN-NEXT: v_writelane_b32 v1, s11, 55 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[76:83] +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 56 +; GCN-NEXT: v_writelane_b32 v1, s5, 57 +; GCN-NEXT: v_writelane_b32 v1, s6, 58 +; GCN-NEXT: v_writelane_b32 v1, s7, 59 +; GCN-NEXT: v_writelane_b32 v1, s8, 60 +; GCN-NEXT: v_writelane_b32 v1, s9, 61 +; GCN-NEXT: v_writelane_b32 v1, s10, 62 +; GCN-NEXT: v_writelane_b32 v1, s11, 63 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[84:91] +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v2, s4, 0 +; GCN-NEXT: v_writelane_b32 v2, s5, 1 +; GCN-NEXT: v_writelane_b32 v2, s6, 2 +; GCN-NEXT: v_writelane_b32 v2, s7, 3 +; GCN-NEXT: v_writelane_b32 v2, s8, 4 +; GCN-NEXT: v_writelane_b32 v2, s9, 5 +; GCN-NEXT: v_writelane_b32 v2, s10, 6 +; GCN-NEXT: v_writelane_b32 v2, s11, 7 +; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_writelane_b32 v0, s0, 0 -; GCN-NEXT: v_writelane_b32 v0, s4, 1 -; GCN-NEXT: v_writelane_b32 v0, s5, 2 -; GCN-NEXT: v_writelane_b32 v0, s6, 3 -; GCN-NEXT: v_writelane_b32 v0, s7, 4 -; GCN-NEXT: v_writelane_b32 v0, s8, 5 -; GCN-NEXT: v_writelane_b32 v0, s9, 6 -; GCN-NEXT: v_writelane_b32 v0, s10, 7 -; GCN-NEXT: v_writelane_b32 v0, s11, 8 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 9 -; GCN-NEXT: v_writelane_b32 v0, s1, 10 -; GCN-NEXT: v_writelane_b32 v0, s2, 11 -; GCN-NEXT: v_writelane_b32 v0, s3, 12 -; GCN-NEXT: v_writelane_b32 v0, s4, 13 -; GCN-NEXT: v_writelane_b32 v0, s5, 14 -; GCN-NEXT: v_writelane_b32 v0, s6, 15 -; GCN-NEXT: v_writelane_b32 v0, s7, 16 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 17 -; GCN-NEXT: v_writelane_b32 v0, s1, 18 -; GCN-NEXT: v_writelane_b32 v0, s2, 19 -; GCN-NEXT: v_writelane_b32 v0, s3, 20 -; GCN-NEXT: v_writelane_b32 v0, s4, 21 -; GCN-NEXT: v_writelane_b32 v0, s5, 22 -; GCN-NEXT: v_writelane_b32 v0, s6, 23 -; GCN-NEXT: v_writelane_b32 v0, s7, 24 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 25 -; GCN-NEXT: v_writelane_b32 v0, s1, 26 -; GCN-NEXT: v_writelane_b32 v0, s2, 27 -; GCN-NEXT: v_writelane_b32 v0, s3, 28 -; GCN-NEXT: v_writelane_b32 v0, s4, 29 -; GCN-NEXT: v_writelane_b32 v0, s5, 30 -; GCN-NEXT: v_writelane_b32 v0, s6, 31 -; GCN-NEXT: v_writelane_b32 v0, s7, 32 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 33 -; GCN-NEXT: v_writelane_b32 v0, s1, 34 -; GCN-NEXT: v_writelane_b32 v0, s2, 35 -; GCN-NEXT: v_writelane_b32 v0, s3, 36 -; GCN-NEXT: v_writelane_b32 v0, s4, 37 -; GCN-NEXT: v_writelane_b32 v0, s5, 38 -; GCN-NEXT: v_writelane_b32 v0, s6, 39 -; GCN-NEXT: v_writelane_b32 v0, s7, 40 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 41 -; GCN-NEXT: v_writelane_b32 v0, s1, 42 -; GCN-NEXT: v_writelane_b32 v0, s2, 43 -; GCN-NEXT: v_writelane_b32 v0, s3, 44 -; GCN-NEXT: v_writelane_b32 v0, s4, 45 -; GCN-NEXT: v_writelane_b32 v0, s5, 46 -; GCN-NEXT: v_writelane_b32 v0, s6, 47 -; GCN-NEXT: v_writelane_b32 v0, s7, 48 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 49 -; GCN-NEXT: v_writelane_b32 v0, s1, 50 -; GCN-NEXT: v_writelane_b32 v0, s2, 51 -; GCN-NEXT: v_writelane_b32 v0, s3, 52 -; GCN-NEXT: v_writelane_b32 v0, s4, 53 -; GCN-NEXT: v_writelane_b32 v0, s5, 54 -; GCN-NEXT: v_writelane_b32 v0, s6, 55 -; GCN-NEXT: v_writelane_b32 v0, s7, 56 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s8, 0 -; GCN-NEXT: v_readlane_b32 s9, v0, 0 -; GCN-NEXT: s_cmp_lg_u32 s9, s8 -; GCN-NEXT: v_writelane_b32 v0, s12, 57 -; GCN-NEXT: v_writelane_b32 v0, s13, 58 -; GCN-NEXT: v_writelane_b32 v0, s14, 59 -; GCN-NEXT: v_writelane_b32 v0, s15, 60 -; GCN-NEXT: v_writelane_b32 v0, s16, 61 -; GCN-NEXT: v_writelane_b32 v0, s17, 62 -; GCN-NEXT: v_writelane_b32 v0, s18, 63 -; GCN-NEXT: v_writelane_b32 v1, s19, 0 -; GCN-NEXT: v_writelane_b32 v1, s20, 1 -; GCN-NEXT: v_writelane_b32 v1, s21, 2 -; GCN-NEXT: v_writelane_b32 v1, s22, 3 -; GCN-NEXT: v_writelane_b32 v1, s23, 4 -; GCN-NEXT: v_writelane_b32 v1, s24, 5 -; GCN-NEXT: v_writelane_b32 v1, s25, 6 -; GCN-NEXT: v_writelane_b32 v1, s26, 7 -; GCN-NEXT: v_writelane_b32 v1, s27, 8 -; GCN-NEXT: v_writelane_b32 v1, s36, 9 -; GCN-NEXT: v_writelane_b32 v1, s37, 10 -; GCN-NEXT: v_writelane_b32 v1, s38, 11 -; GCN-NEXT: v_writelane_b32 v1, s39, 12 -; GCN-NEXT: v_writelane_b32 v1, s40, 13 -; GCN-NEXT: v_writelane_b32 v1, s41, 14 -; GCN-NEXT: v_writelane_b32 v1, s42, 15 -; GCN-NEXT: v_writelane_b32 v1, s43, 16 -; GCN-NEXT: v_writelane_b32 v1, s44, 17 -; GCN-NEXT: v_writelane_b32 v1, s45, 18 -; GCN-NEXT: v_writelane_b32 v1, s46, 19 -; GCN-NEXT: v_writelane_b32 v1, s47, 20 -; GCN-NEXT: v_writelane_b32 v1, s48, 21 -; GCN-NEXT: v_writelane_b32 v1, s49, 22 -; GCN-NEXT: v_writelane_b32 v1, s50, 23 -; GCN-NEXT: v_writelane_b32 v1, s51, 24 -; GCN-NEXT: v_writelane_b32 v1, s52, 25 -; GCN-NEXT: v_writelane_b32 v1, s53, 26 -; GCN-NEXT: v_writelane_b32 v1, s54, 27 -; GCN-NEXT: v_writelane_b32 v1, s55, 28 -; GCN-NEXT: v_writelane_b32 v1, s56, 29 -; GCN-NEXT: v_writelane_b32 v1, s57, 30 -; GCN-NEXT: v_writelane_b32 v1, s58, 31 -; GCN-NEXT: v_writelane_b32 v1, s59, 32 -; GCN-NEXT: v_writelane_b32 v1, s60, 33 -; GCN-NEXT: v_writelane_b32 v1, s61, 34 -; GCN-NEXT: v_writelane_b32 v1, s62, 35 -; GCN-NEXT: v_writelane_b32 v1, s63, 36 -; GCN-NEXT: v_writelane_b32 v1, s64, 37 -; GCN-NEXT: v_writelane_b32 v1, s65, 38 -; GCN-NEXT: v_writelane_b32 v1, s66, 39 -; GCN-NEXT: v_writelane_b32 v1, s67, 40 -; GCN-NEXT: v_writelane_b32 v1, s68, 41 -; GCN-NEXT: v_writelane_b32 v1, s69, 42 -; GCN-NEXT: v_writelane_b32 v1, s70, 43 -; GCN-NEXT: v_writelane_b32 v1, s71, 44 -; GCN-NEXT: v_writelane_b32 v1, s72, 45 -; GCN-NEXT: v_writelane_b32 v1, s73, 46 -; GCN-NEXT: v_writelane_b32 v1, s74, 47 -; GCN-NEXT: v_writelane_b32 v1, s75, 48 -; GCN-NEXT: v_writelane_b32 v1, s76, 49 -; GCN-NEXT: v_writelane_b32 v1, s77, 50 -; GCN-NEXT: v_writelane_b32 v1, s78, 51 -; GCN-NEXT: v_writelane_b32 v1, s79, 52 -; GCN-NEXT: v_writelane_b32 v1, s80, 53 -; GCN-NEXT: v_writelane_b32 v1, s81, 54 -; GCN-NEXT: v_writelane_b32 v1, s82, 55 -; GCN-NEXT: v_writelane_b32 v1, s83, 56 -; GCN-NEXT: v_writelane_b32 v1, s84, 57 -; GCN-NEXT: v_writelane_b32 v1, s85, 58 -; GCN-NEXT: v_writelane_b32 v1, s86, 59 -; GCN-NEXT: v_writelane_b32 v1, s87, 60 -; GCN-NEXT: v_writelane_b32 v1, s88, 61 -; GCN-NEXT: v_writelane_b32 v1, s89, 62 -; GCN-NEXT: v_writelane_b32 v1, s90, 63 -; GCN-NEXT: v_writelane_b32 v2, s91, 0 -; GCN-NEXT: v_writelane_b32 v2, s0, 1 -; GCN-NEXT: v_writelane_b32 v2, s1, 2 -; GCN-NEXT: v_writelane_b32 v2, s2, 3 -; GCN-NEXT: v_writelane_b32 v2, s3, 4 -; GCN-NEXT: v_writelane_b32 v2, s4, 5 -; GCN-NEXT: v_writelane_b32 v2, s5, 6 -; GCN-NEXT: v_writelane_b32 v2, s6, 7 -; GCN-NEXT: v_writelane_b32 v2, s7, 8 +; GCN-NEXT: s_cmp_lg_u32 s0, s1 ; GCN-NEXT: s_cbranch_scc1 BB0_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v0, 1 -; GCN-NEXT: v_readlane_b32 s1, v0, 2 -; GCN-NEXT: v_readlane_b32 s2, v0, 3 -; GCN-NEXT: v_readlane_b32 s3, v0, 4 -; GCN-NEXT: v_readlane_b32 s4, v0, 5 -; GCN-NEXT: v_readlane_b32 s5, v0, 6 -; GCN-NEXT: v_readlane_b32 s6, v0, 7 -; GCN-NEXT: v_readlane_b32 s7, v0, 8 +; GCN-NEXT: v_readlane_b32 s8, v1, 56 +; GCN-NEXT: v_readlane_b32 s9, v1, 57 +; GCN-NEXT: v_readlane_b32 s10, v1, 58 +; GCN-NEXT: v_readlane_b32 s11, v1, 59 +; GCN-NEXT: v_readlane_b32 s12, v1, 60 +; GCN-NEXT: v_readlane_b32 s13, v1, 61 +; GCN-NEXT: v_readlane_b32 s14, v1, 62 +; GCN-NEXT: v_readlane_b32 s15, v1, 63 +; GCN-NEXT: v_readlane_b32 s16, v1, 48 +; GCN-NEXT: v_readlane_b32 s17, v1, 49 +; GCN-NEXT: v_readlane_b32 s18, v1, 50 +; GCN-NEXT: v_readlane_b32 s19, v1, 51 +; GCN-NEXT: v_readlane_b32 s20, v1, 52 +; GCN-NEXT: v_readlane_b32 s21, v1, 53 +; GCN-NEXT: v_readlane_b32 s22, v1, 54 +; GCN-NEXT: v_readlane_b32 s23, v1, 55 +; GCN-NEXT: v_readlane_b32 s24, v1, 40 +; GCN-NEXT: v_readlane_b32 s25, v1, 41 +; GCN-NEXT: v_readlane_b32 s26, v1, 42 +; GCN-NEXT: v_readlane_b32 s27, v1, 43 +; GCN-NEXT: v_readlane_b32 s28, v1, 44 +; GCN-NEXT: v_readlane_b32 s29, v1, 45 +; GCN-NEXT: v_readlane_b32 s30, v1, 46 +; GCN-NEXT: v_readlane_b32 s31, v1, 47 +; GCN-NEXT: v_readlane_b32 s36, v1, 32 +; GCN-NEXT: v_readlane_b32 s37, v1, 33 +; GCN-NEXT: v_readlane_b32 s38, v1, 34 +; GCN-NEXT: v_readlane_b32 s39, v1, 35 +; GCN-NEXT: v_readlane_b32 s40, v1, 36 +; GCN-NEXT: v_readlane_b32 s41, v1, 37 +; GCN-NEXT: v_readlane_b32 s42, v1, 38 +; GCN-NEXT: v_readlane_b32 s43, v1, 39 +; GCN-NEXT: v_readlane_b32 s44, v1, 24 +; GCN-NEXT: v_readlane_b32 s45, v1, 25 +; GCN-NEXT: v_readlane_b32 s46, v1, 26 +; GCN-NEXT: v_readlane_b32 s47, v1, 27 +; GCN-NEXT: v_readlane_b32 s48, v1, 28 +; GCN-NEXT: v_readlane_b32 s49, v1, 29 +; GCN-NEXT: v_readlane_b32 s50, v1, 30 +; GCN-NEXT: v_readlane_b32 s51, v1, 31 +; GCN-NEXT: v_readlane_b32 s52, v1, 16 +; GCN-NEXT: v_readlane_b32 s53, v1, 17 +; GCN-NEXT: v_readlane_b32 s54, v1, 18 +; GCN-NEXT: v_readlane_b32 s55, v1, 19 +; GCN-NEXT: v_readlane_b32 s56, v1, 20 +; GCN-NEXT: v_readlane_b32 s57, v1, 21 +; GCN-NEXT: v_readlane_b32 s58, v1, 22 +; GCN-NEXT: v_readlane_b32 s59, v1, 23 +; GCN-NEXT: v_readlane_b32 s60, v1, 8 +; GCN-NEXT: v_readlane_b32 s61, v1, 9 +; GCN-NEXT: v_readlane_b32 s62, v1, 10 +; GCN-NEXT: v_readlane_b32 s63, v1, 11 +; GCN-NEXT: v_readlane_b32 s64, v1, 12 +; GCN-NEXT: v_readlane_b32 s65, v1, 13 +; GCN-NEXT: v_readlane_b32 s66, v1, 14 +; GCN-NEXT: v_readlane_b32 s67, v1, 15 +; GCN-NEXT: v_readlane_b32 s68, v1, 0 +; GCN-NEXT: v_readlane_b32 s69, v1, 1 +; GCN-NEXT: v_readlane_b32 s70, v1, 2 +; GCN-NEXT: v_readlane_b32 s71, v1, 3 +; GCN-NEXT: v_readlane_b32 s72, v1, 4 +; GCN-NEXT: v_readlane_b32 s73, v1, 5 +; GCN-NEXT: v_readlane_b32 s74, v1, 6 +; GCN-NEXT: v_readlane_b32 s75, v1, 7 +; GCN-NEXT: v_readlane_b32 s76, v0, 56 +; GCN-NEXT: v_readlane_b32 s77, v0, 57 +; GCN-NEXT: v_readlane_b32 s78, v0, 58 +; GCN-NEXT: v_readlane_b32 s79, v0, 59 +; GCN-NEXT: v_readlane_b32 s80, v0, 60 +; GCN-NEXT: v_readlane_b32 s81, v0, 61 +; GCN-NEXT: v_readlane_b32 s82, v0, 62 +; GCN-NEXT: v_readlane_b32 s83, v0, 63 +; GCN-NEXT: v_readlane_b32 s84, v0, 48 +; GCN-NEXT: v_readlane_b32 s85, v0, 49 +; GCN-NEXT: v_readlane_b32 s86, v0, 50 +; GCN-NEXT: v_readlane_b32 s87, v0, 51 +; GCN-NEXT: v_readlane_b32 s88, v0, 52 +; GCN-NEXT: v_readlane_b32 s89, v0, 53 +; GCN-NEXT: v_readlane_b32 s90, v0, 54 +; GCN-NEXT: v_readlane_b32 s91, v0, 55 +; GCN-NEXT: v_readlane_b32 s0, v0, 0 +; GCN-NEXT: v_readlane_b32 s1, v0, 1 +; GCN-NEXT: v_readlane_b32 s2, v0, 2 +; GCN-NEXT: v_readlane_b32 s3, v0, 3 +; GCN-NEXT: v_readlane_b32 s4, v0, 4 +; GCN-NEXT: v_readlane_b32 s5, v0, 5 +; GCN-NEXT: v_readlane_b32 s6, v0, 6 +; GCN-NEXT: v_readlane_b32 s7, v0, 7 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 57 -; GCN-NEXT: v_readlane_b32 s1, v0, 58 -; GCN-NEXT: v_readlane_b32 s2, v0, 59 -; GCN-NEXT: v_readlane_b32 s3, v0, 60 -; GCN-NEXT: v_readlane_b32 s4, v0, 61 -; GCN-NEXT: v_readlane_b32 s5, v0, 62 -; GCN-NEXT: v_readlane_b32 s6, v0, 63 -; GCN-NEXT: v_readlane_b32 s7, v1, 0 +; GCN-NEXT: v_readlane_b32 s0, v0, 8 +; GCN-NEXT: v_readlane_b32 s1, v0, 9 +; GCN-NEXT: v_readlane_b32 s2, v0, 10 +; GCN-NEXT: v_readlane_b32 s3, v0, 11 +; GCN-NEXT: v_readlane_b32 s4, v0, 12 +; GCN-NEXT: v_readlane_b32 s5, v0, 13 +; GCN-NEXT: v_readlane_b32 s6, v0, 14 +; GCN-NEXT: v_readlane_b32 s7, v0, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 1 -; GCN-NEXT: v_readlane_b32 s1, v1, 2 -; GCN-NEXT: v_readlane_b32 s2, v1, 3 -; GCN-NEXT: v_readlane_b32 s3, v1, 4 -; GCN-NEXT: v_readlane_b32 s4, v1, 5 -; GCN-NEXT: v_readlane_b32 s5, v1, 6 -; GCN-NEXT: v_readlane_b32 s6, v1, 7 -; GCN-NEXT: v_readlane_b32 s7, v1, 8 +; GCN-NEXT: v_readlane_b32 s0, v0, 16 +; GCN-NEXT: v_readlane_b32 s1, v0, 17 +; GCN-NEXT: v_readlane_b32 s2, v0, 18 +; GCN-NEXT: v_readlane_b32 s3, v0, 19 +; GCN-NEXT: v_readlane_b32 s4, v0, 20 +; GCN-NEXT: v_readlane_b32 s5, v0, 21 +; GCN-NEXT: v_readlane_b32 s6, v0, 22 +; GCN-NEXT: v_readlane_b32 s7, v0, 23 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 9 -; GCN-NEXT: v_readlane_b32 s1, v1, 10 -; GCN-NEXT: v_readlane_b32 s2, v1, 11 -; GCN-NEXT: v_readlane_b32 s3, v1, 12 -; GCN-NEXT: v_readlane_b32 s4, v1, 13 -; GCN-NEXT: v_readlane_b32 s5, v1, 14 -; GCN-NEXT: v_readlane_b32 s6, v1, 15 -; GCN-NEXT: v_readlane_b32 s7, v1, 16 +; GCN-NEXT: v_readlane_b32 s0, v0, 24 +; GCN-NEXT: v_readlane_b32 s1, v0, 25 +; GCN-NEXT: v_readlane_b32 s2, v0, 26 +; GCN-NEXT: v_readlane_b32 s3, v0, 27 +; GCN-NEXT: v_readlane_b32 s4, v0, 28 +; GCN-NEXT: v_readlane_b32 s5, v0, 29 +; GCN-NEXT: v_readlane_b32 s6, v0, 30 +; GCN-NEXT: v_readlane_b32 s7, v0, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 17 -; GCN-NEXT: v_readlane_b32 s1, v1, 18 -; GCN-NEXT: v_readlane_b32 s2, v1, 19 -; GCN-NEXT: v_readlane_b32 s3, v1, 20 -; GCN-NEXT: v_readlane_b32 s4, v1, 21 -; GCN-NEXT: v_readlane_b32 s5, v1, 22 -; GCN-NEXT: v_readlane_b32 s6, v1, 23 -; GCN-NEXT: v_readlane_b32 s7, v1, 24 +; GCN-NEXT: v_readlane_b32 s0, v0, 32 +; GCN-NEXT: v_readlane_b32 s1, v0, 33 +; GCN-NEXT: v_readlane_b32 s2, v0, 34 +; GCN-NEXT: v_readlane_b32 s3, v0, 35 +; GCN-NEXT: v_readlane_b32 s4, v0, 36 +; GCN-NEXT: v_readlane_b32 s5, v0, 37 +; GCN-NEXT: v_readlane_b32 s6, v0, 38 +; GCN-NEXT: v_readlane_b32 s7, v0, 39 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 25 -; GCN-NEXT: v_readlane_b32 s1, v1, 26 -; GCN-NEXT: v_readlane_b32 s2, v1, 27 -; GCN-NEXT: v_readlane_b32 s3, v1, 28 -; GCN-NEXT: v_readlane_b32 s4, v1, 29 -; GCN-NEXT: v_readlane_b32 s5, v1, 30 -; GCN-NEXT: v_readlane_b32 s6, v1, 31 -; GCN-NEXT: v_readlane_b32 s7, v1, 32 +; GCN-NEXT: v_readlane_b32 s0, v0, 40 +; GCN-NEXT: v_readlane_b32 s1, v0, 41 +; GCN-NEXT: v_readlane_b32 s2, v0, 42 +; GCN-NEXT: v_readlane_b32 s3, v0, 43 +; GCN-NEXT: v_readlane_b32 s4, v0, 44 +; GCN-NEXT: v_readlane_b32 s5, v0, 45 +; GCN-NEXT: v_readlane_b32 s6, v0, 46 +; GCN-NEXT: v_readlane_b32 s7, v0, 47 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 33 -; GCN-NEXT: v_readlane_b32 s1, v1, 34 -; GCN-NEXT: v_readlane_b32 s2, v1, 35 -; GCN-NEXT: v_readlane_b32 s3, v1, 36 -; GCN-NEXT: v_readlane_b32 s4, v1, 37 -; GCN-NEXT: v_readlane_b32 s5, v1, 38 -; GCN-NEXT: v_readlane_b32 s6, v1, 39 -; GCN-NEXT: v_readlane_b32 s7, v1, 40 +; GCN-NEXT: v_readlane_b32 s0, v2, 0 +; GCN-NEXT: v_readlane_b32 s1, v2, 1 +; GCN-NEXT: v_readlane_b32 s2, v2, 2 +; GCN-NEXT: v_readlane_b32 s3, v2, 3 +; GCN-NEXT: v_readlane_b32 s4, v2, 4 +; GCN-NEXT: v_readlane_b32 s5, v2, 5 +; GCN-NEXT: v_readlane_b32 s6, v2, 6 +; GCN-NEXT: v_readlane_b32 s7, v2, 7 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[84:91] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 41 -; GCN-NEXT: v_readlane_b32 s1, v1, 42 -; GCN-NEXT: v_readlane_b32 s2, v1, 43 -; GCN-NEXT: v_readlane_b32 s3, v1, 44 -; GCN-NEXT: v_readlane_b32 s4, v1, 45 -; GCN-NEXT: v_readlane_b32 s5, v1, 46 -; GCN-NEXT: v_readlane_b32 s6, v1, 47 -; GCN-NEXT: v_readlane_b32 s7, v1, 48 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[76:83] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 49 -; GCN-NEXT: v_readlane_b32 s1, v1, 50 -; GCN-NEXT: v_readlane_b32 s2, v1, 51 -; GCN-NEXT: v_readlane_b32 s3, v1, 52 -; GCN-NEXT: v_readlane_b32 s4, v1, 53 -; GCN-NEXT: v_readlane_b32 s5, v1, 54 -; GCN-NEXT: v_readlane_b32 s6, v1, 55 -; GCN-NEXT: v_readlane_b32 s7, v1, 56 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[68:75] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 57 -; GCN-NEXT: v_readlane_b32 s1, v1, 58 -; GCN-NEXT: v_readlane_b32 s2, v1, 59 -; GCN-NEXT: v_readlane_b32 s3, v1, 60 -; GCN-NEXT: v_readlane_b32 s4, v1, 61 -; GCN-NEXT: v_readlane_b32 s5, v1, 62 -; GCN-NEXT: v_readlane_b32 s6, v1, 63 -; GCN-NEXT: v_readlane_b32 s7, v2, 0 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[60:67] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 9 -; GCN-NEXT: v_readlane_b32 s1, v0, 10 -; GCN-NEXT: v_readlane_b32 s2, v0, 11 -; GCN-NEXT: v_readlane_b32 s3, v0, 12 -; GCN-NEXT: v_readlane_b32 s4, v0, 13 -; GCN-NEXT: v_readlane_b32 s5, v0, 14 -; GCN-NEXT: v_readlane_b32 s6, v0, 15 -; GCN-NEXT: v_readlane_b32 s7, v0, 16 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[52:59] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 17 -; GCN-NEXT: v_readlane_b32 s1, v0, 18 -; GCN-NEXT: v_readlane_b32 s2, v0, 19 -; GCN-NEXT: v_readlane_b32 s3, v0, 20 -; GCN-NEXT: v_readlane_b32 s4, v0, 21 -; GCN-NEXT: v_readlane_b32 s5, v0, 22 -; GCN-NEXT: v_readlane_b32 s6, v0, 23 -; GCN-NEXT: v_readlane_b32 s7, v0, 24 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[44:51] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 25 -; GCN-NEXT: v_readlane_b32 s1, v0, 26 -; GCN-NEXT: v_readlane_b32 s2, v0, 27 -; GCN-NEXT: v_readlane_b32 s3, v0, 28 -; GCN-NEXT: v_readlane_b32 s4, v0, 29 -; GCN-NEXT: v_readlane_b32 s5, v0, 30 -; GCN-NEXT: v_readlane_b32 s6, v0, 31 -; GCN-NEXT: v_readlane_b32 s7, v0, 32 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[36:43] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 33 -; GCN-NEXT: v_readlane_b32 s1, v0, 34 -; GCN-NEXT: v_readlane_b32 s2, v0, 35 -; GCN-NEXT: v_readlane_b32 s3, v0, 36 -; GCN-NEXT: v_readlane_b32 s4, v0, 37 -; GCN-NEXT: v_readlane_b32 s5, v0, 38 -; GCN-NEXT: v_readlane_b32 s6, v0, 39 -; GCN-NEXT: v_readlane_b32 s7, v0, 40 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[24:31] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 41 -; GCN-NEXT: v_readlane_b32 s1, v0, 42 -; GCN-NEXT: v_readlane_b32 s2, v0, 43 -; GCN-NEXT: v_readlane_b32 s3, v0, 44 -; GCN-NEXT: v_readlane_b32 s4, v0, 45 -; GCN-NEXT: v_readlane_b32 s5, v0, 46 -; GCN-NEXT: v_readlane_b32 s6, v0, 47 -; GCN-NEXT: v_readlane_b32 s7, v0, 48 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[16:23] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 49 -; GCN-NEXT: v_readlane_b32 s1, v0, 50 -; GCN-NEXT: v_readlane_b32 s2, v0, 51 -; GCN-NEXT: v_readlane_b32 s3, v0, 52 -; GCN-NEXT: v_readlane_b32 s4, v0, 53 -; GCN-NEXT: v_readlane_b32 s5, v0, 54 -; GCN-NEXT: v_readlane_b32 s6, v0, 55 -; GCN-NEXT: v_readlane_b32 s7, v0, 56 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[8:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v2, 1 -; GCN-NEXT: v_readlane_b32 s1, v2, 2 -; GCN-NEXT: v_readlane_b32 s2, v2, 3 -; GCN-NEXT: v_readlane_b32 s3, v2, 4 -; GCN-NEXT: v_readlane_b32 s4, v2, 5 -; GCN-NEXT: v_readlane_b32 s5, v2, 6 -; GCN-NEXT: v_readlane_b32 s6, v2, 7 -; GCN-NEXT: v_readlane_b32 s7, v2, 8 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND @@ -448,191 +446,189 @@ ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 0 +; GCN-NEXT: v_writelane_b32 v0, s5, 1 +; GCN-NEXT: v_writelane_b32 v0, s6, 2 +; GCN-NEXT: v_writelane_b32 v0, s7, 3 +; GCN-NEXT: v_writelane_b32 v0, s8, 4 +; GCN-NEXT: v_writelane_b32 v0, s9, 5 +; GCN-NEXT: v_writelane_b32 v0, s10, 6 +; GCN-NEXT: v_writelane_b32 v0, s11, 7 +; GCN-NEXT: v_writelane_b32 v0, s12, 8 +; GCN-NEXT: v_writelane_b32 v0, s13, 9 +; GCN-NEXT: v_writelane_b32 v0, s14, 10 +; GCN-NEXT: v_writelane_b32 v0, s15, 11 +; GCN-NEXT: v_writelane_b32 v0, s16, 12 +; GCN-NEXT: v_writelane_b32 v0, s17, 13 +; GCN-NEXT: v_writelane_b32 v0, s18, 14 +; GCN-NEXT: v_writelane_b32 v0, s19, 15 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 16 +; GCN-NEXT: v_writelane_b32 v0, s5, 17 +; GCN-NEXT: v_writelane_b32 v0, s6, 18 +; GCN-NEXT: v_writelane_b32 v0, s7, 19 +; GCN-NEXT: v_writelane_b32 v0, s8, 20 +; GCN-NEXT: v_writelane_b32 v0, s9, 21 +; GCN-NEXT: v_writelane_b32 v0, s10, 22 +; GCN-NEXT: v_writelane_b32 v0, s11, 23 +; GCN-NEXT: v_writelane_b32 v0, s12, 24 +; GCN-NEXT: v_writelane_b32 v0, s13, 25 +; GCN-NEXT: v_writelane_b32 v0, s14, 26 +; GCN-NEXT: v_writelane_b32 v0, s15, 27 +; GCN-NEXT: v_writelane_b32 v0, s16, 28 +; GCN-NEXT: v_writelane_b32 v0, s17, 29 +; GCN-NEXT: v_writelane_b32 v0, s18, 30 +; GCN-NEXT: v_writelane_b32 v0, s19, 31 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 32 +; GCN-NEXT: v_writelane_b32 v0, s5, 33 +; GCN-NEXT: v_writelane_b32 v0, s6, 34 +; GCN-NEXT: v_writelane_b32 v0, s7, 35 +; GCN-NEXT: v_writelane_b32 v0, s8, 36 +; GCN-NEXT: v_writelane_b32 v0, s9, 37 +; GCN-NEXT: v_writelane_b32 v0, s10, 38 +; GCN-NEXT: v_writelane_b32 v0, s11, 39 +; GCN-NEXT: v_writelane_b32 v0, s12, 40 +; GCN-NEXT: v_writelane_b32 v0, s13, 41 +; GCN-NEXT: v_writelane_b32 v0, s14, 42 +; GCN-NEXT: v_writelane_b32 v0, s15, 43 +; GCN-NEXT: v_writelane_b32 v0, s16, 44 +; GCN-NEXT: v_writelane_b32 v0, s17, 45 +; GCN-NEXT: v_writelane_b32 v0, s18, 46 +; GCN-NEXT: v_writelane_b32 v0, s19, 47 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[36:51] +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s4, 48 +; GCN-NEXT: v_writelane_b32 v0, s5, 49 +; GCN-NEXT: v_writelane_b32 v0, s6, 50 +; GCN-NEXT: v_writelane_b32 v0, s7, 51 +; GCN-NEXT: v_writelane_b32 v0, s8, 52 +; GCN-NEXT: v_writelane_b32 v0, s9, 53 +; GCN-NEXT: v_writelane_b32 v0, s10, 54 +; GCN-NEXT: v_writelane_b32 v0, s11, 55 +; GCN-NEXT: v_writelane_b32 v0, s12, 56 +; GCN-NEXT: v_writelane_b32 v0, s13, 57 +; GCN-NEXT: v_writelane_b32 v0, s14, 58 +; GCN-NEXT: v_writelane_b32 v0, s15, 59 +; GCN-NEXT: v_writelane_b32 v0, s16, 60 +; GCN-NEXT: v_writelane_b32 v0, s17, 61 +; GCN-NEXT: v_writelane_b32 v0, s18, 62 +; GCN-NEXT: v_writelane_b32 v0, s19, 63 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s4, 0 +; GCN-NEXT: v_writelane_b32 v1, s5, 1 +; GCN-NEXT: v_writelane_b32 v1, s6, 2 +; GCN-NEXT: v_writelane_b32 v1, s7, 3 +; GCN-NEXT: v_writelane_b32 v1, s8, 4 +; GCN-NEXT: v_writelane_b32 v1, s9, 5 +; GCN-NEXT: v_writelane_b32 v1, s10, 6 +; GCN-NEXT: v_writelane_b32 v1, s11, 7 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[2:3] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v1, s2, 8 +; GCN-NEXT: v_writelane_b32 v1, s3, 9 +; GCN-NEXT: s_mov_b32 s1, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_writelane_b32 v0, s0, 0 -; GCN-NEXT: v_writelane_b32 v0, s4, 1 -; GCN-NEXT: v_writelane_b32 v0, s5, 2 -; GCN-NEXT: v_writelane_b32 v0, s6, 3 -; GCN-NEXT: v_writelane_b32 v0, s7, 4 -; GCN-NEXT: v_writelane_b32 v0, s8, 5 -; GCN-NEXT: v_writelane_b32 v0, s9, 6 -; GCN-NEXT: v_writelane_b32 v0, s10, 7 -; GCN-NEXT: v_writelane_b32 v0, s11, 8 -; GCN-NEXT: v_writelane_b32 v0, s12, 9 -; GCN-NEXT: v_writelane_b32 v0, s13, 10 -; GCN-NEXT: v_writelane_b32 v0, s14, 11 -; GCN-NEXT: v_writelane_b32 v0, s15, 12 -; GCN-NEXT: v_writelane_b32 v0, s16, 13 -; GCN-NEXT: v_writelane_b32 v0, s17, 14 -; GCN-NEXT: v_writelane_b32 v0, s18, 15 -; GCN-NEXT: v_writelane_b32 v0, s19, 16 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:15] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[16:31] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s0, 17 -; GCN-NEXT: v_writelane_b32 v0, s1, 18 -; GCN-NEXT: v_writelane_b32 v0, s2, 19 -; GCN-NEXT: v_writelane_b32 v0, s3, 20 -; GCN-NEXT: v_writelane_b32 v0, s4, 21 -; GCN-NEXT: v_writelane_b32 v0, s5, 22 -; GCN-NEXT: v_writelane_b32 v0, s6, 23 -; GCN-NEXT: v_writelane_b32 v0, s7, 24 -; GCN-NEXT: v_writelane_b32 v0, s8, 25 -; GCN-NEXT: v_writelane_b32 v0, s9, 26 -; GCN-NEXT: v_writelane_b32 v0, s10, 27 -; GCN-NEXT: v_writelane_b32 v0, s11, 28 -; GCN-NEXT: v_writelane_b32 v0, s12, 29 -; GCN-NEXT: v_writelane_b32 v0, s13, 30 -; GCN-NEXT: v_writelane_b32 v0, s14, 31 -; GCN-NEXT: v_writelane_b32 v0, s15, 32 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:7] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[8:9] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s10, 0 -; GCN-NEXT: v_readlane_b32 s11, v0, 0 -; GCN-NEXT: s_cmp_lg_u32 s11, s10 -; GCN-NEXT: v_writelane_b32 v0, s36, 33 -; GCN-NEXT: v_writelane_b32 v0, s37, 34 -; GCN-NEXT: v_writelane_b32 v0, s38, 35 -; GCN-NEXT: v_writelane_b32 v0, s39, 36 -; GCN-NEXT: v_writelane_b32 v0, s40, 37 -; GCN-NEXT: v_writelane_b32 v0, s41, 38 -; GCN-NEXT: v_writelane_b32 v0, s42, 39 -; GCN-NEXT: v_writelane_b32 v0, s43, 40 -; GCN-NEXT: v_writelane_b32 v0, s44, 41 -; GCN-NEXT: v_writelane_b32 v0, s45, 42 -; GCN-NEXT: v_writelane_b32 v0, s46, 43 -; GCN-NEXT: v_writelane_b32 v0, s47, 44 -; GCN-NEXT: v_writelane_b32 v0, s48, 45 -; GCN-NEXT: v_writelane_b32 v0, s49, 46 -; GCN-NEXT: v_writelane_b32 v0, s50, 47 -; GCN-NEXT: v_writelane_b32 v0, s51, 48 -; GCN-NEXT: v_writelane_b32 v0, s16, 49 -; GCN-NEXT: v_writelane_b32 v0, s17, 50 -; GCN-NEXT: v_writelane_b32 v0, s18, 51 -; GCN-NEXT: v_writelane_b32 v0, s19, 52 -; GCN-NEXT: v_writelane_b32 v0, s20, 53 -; GCN-NEXT: v_writelane_b32 v0, s21, 54 -; GCN-NEXT: v_writelane_b32 v0, s22, 55 -; GCN-NEXT: v_writelane_b32 v0, s23, 56 -; GCN-NEXT: v_writelane_b32 v0, s24, 57 -; GCN-NEXT: v_writelane_b32 v0, s25, 58 -; GCN-NEXT: v_writelane_b32 v0, s26, 59 -; GCN-NEXT: v_writelane_b32 v0, s27, 60 -; GCN-NEXT: v_writelane_b32 v0, s28, 61 -; GCN-NEXT: v_writelane_b32 v0, s29, 62 -; GCN-NEXT: v_writelane_b32 v0, s30, 63 -; GCN-NEXT: v_writelane_b32 v1, s31, 0 -; GCN-NEXT: v_writelane_b32 v1, s0, 1 -; GCN-NEXT: v_writelane_b32 v1, s1, 2 -; GCN-NEXT: v_writelane_b32 v1, s2, 3 -; GCN-NEXT: v_writelane_b32 v1, s3, 4 -; GCN-NEXT: v_writelane_b32 v1, s4, 5 -; GCN-NEXT: v_writelane_b32 v1, s5, 6 -; GCN-NEXT: v_writelane_b32 v1, s6, 7 -; GCN-NEXT: v_writelane_b32 v1, s7, 8 -; GCN-NEXT: v_writelane_b32 v1, s8, 9 -; GCN-NEXT: v_writelane_b32 v1, s9, 10 +; GCN-NEXT: s_cmp_lg_u32 s0, s1 ; GCN-NEXT: s_cbranch_scc1 BB1_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v0, 1 -; GCN-NEXT: v_readlane_b32 s1, v0, 2 -; GCN-NEXT: v_readlane_b32 s2, v0, 3 -; GCN-NEXT: v_readlane_b32 s3, v0, 4 -; GCN-NEXT: v_readlane_b32 s4, v0, 5 -; GCN-NEXT: v_readlane_b32 s5, v0, 6 -; GCN-NEXT: v_readlane_b32 s6, v0, 7 -; GCN-NEXT: v_readlane_b32 s7, v0, 8 -; GCN-NEXT: v_readlane_b32 s8, v0, 9 -; GCN-NEXT: v_readlane_b32 s9, v0, 10 -; GCN-NEXT: v_readlane_b32 s10, v0, 11 -; GCN-NEXT: v_readlane_b32 s11, v0, 12 -; GCN-NEXT: v_readlane_b32 s12, v0, 13 -; GCN-NEXT: v_readlane_b32 s13, v0, 14 -; GCN-NEXT: v_readlane_b32 s14, v0, 15 -; GCN-NEXT: v_readlane_b32 s15, v0, 16 +; GCN-NEXT: v_readlane_b32 s16, v1, 8 +; GCN-NEXT: v_readlane_b32 s17, v1, 9 +; GCN-NEXT: v_readlane_b32 s20, v1, 0 +; GCN-NEXT: v_readlane_b32 s21, v1, 1 +; GCN-NEXT: v_readlane_b32 s22, v1, 2 +; GCN-NEXT: v_readlane_b32 s23, v1, 3 +; GCN-NEXT: v_readlane_b32 s24, v1, 4 +; GCN-NEXT: v_readlane_b32 s25, v1, 5 +; GCN-NEXT: v_readlane_b32 s26, v1, 6 +; GCN-NEXT: v_readlane_b32 s27, v1, 7 +; GCN-NEXT: v_readlane_b32 s36, v0, 32 +; GCN-NEXT: v_readlane_b32 s37, v0, 33 +; GCN-NEXT: v_readlane_b32 s38, v0, 34 +; GCN-NEXT: v_readlane_b32 s39, v0, 35 +; GCN-NEXT: v_readlane_b32 s40, v0, 36 +; GCN-NEXT: v_readlane_b32 s41, v0, 37 +; GCN-NEXT: v_readlane_b32 s42, v0, 38 +; GCN-NEXT: v_readlane_b32 s43, v0, 39 +; GCN-NEXT: v_readlane_b32 s44, v0, 40 +; GCN-NEXT: v_readlane_b32 s45, v0, 41 +; GCN-NEXT: v_readlane_b32 s46, v0, 42 +; GCN-NEXT: v_readlane_b32 s47, v0, 43 +; GCN-NEXT: v_readlane_b32 s48, v0, 44 +; GCN-NEXT: v_readlane_b32 s49, v0, 45 +; GCN-NEXT: v_readlane_b32 s50, v0, 46 +; GCN-NEXT: v_readlane_b32 s51, v0, 47 +; GCN-NEXT: v_readlane_b32 s0, v0, 0 +; GCN-NEXT: v_readlane_b32 s1, v0, 1 +; GCN-NEXT: v_readlane_b32 s2, v0, 2 +; GCN-NEXT: v_readlane_b32 s3, v0, 3 +; GCN-NEXT: v_readlane_b32 s4, v0, 4 +; GCN-NEXT: v_readlane_b32 s5, v0, 5 +; GCN-NEXT: v_readlane_b32 s6, v0, 6 +; GCN-NEXT: v_readlane_b32 s7, v0, 7 +; GCN-NEXT: v_readlane_b32 s8, v0, 8 +; GCN-NEXT: v_readlane_b32 s9, v0, 9 +; GCN-NEXT: v_readlane_b32 s10, v0, 10 +; GCN-NEXT: v_readlane_b32 s11, v0, 11 +; GCN-NEXT: v_readlane_b32 s12, v0, 12 +; GCN-NEXT: v_readlane_b32 s13, v0, 13 +; GCN-NEXT: v_readlane_b32 s14, v0, 14 +; GCN-NEXT: v_readlane_b32 s15, v0, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 33 -; GCN-NEXT: v_readlane_b32 s1, v0, 34 -; GCN-NEXT: v_readlane_b32 s2, v0, 35 -; GCN-NEXT: v_readlane_b32 s3, v0, 36 -; GCN-NEXT: v_readlane_b32 s4, v0, 37 -; GCN-NEXT: v_readlane_b32 s5, v0, 38 -; GCN-NEXT: v_readlane_b32 s6, v0, 39 -; GCN-NEXT: v_readlane_b32 s7, v0, 40 -; GCN-NEXT: v_readlane_b32 s8, v0, 41 -; GCN-NEXT: v_readlane_b32 s9, v0, 42 -; GCN-NEXT: v_readlane_b32 s10, v0, 43 -; GCN-NEXT: v_readlane_b32 s11, v0, 44 -; GCN-NEXT: v_readlane_b32 s12, v0, 45 -; GCN-NEXT: v_readlane_b32 s13, v0, 46 -; GCN-NEXT: v_readlane_b32 s14, v0, 47 -; GCN-NEXT: v_readlane_b32 s15, v0, 48 +; GCN-NEXT: v_readlane_b32 s0, v0, 16 +; GCN-NEXT: v_readlane_b32 s1, v0, 17 +; GCN-NEXT: v_readlane_b32 s2, v0, 18 +; GCN-NEXT: v_readlane_b32 s3, v0, 19 +; GCN-NEXT: v_readlane_b32 s4, v0, 20 +; GCN-NEXT: v_readlane_b32 s5, v0, 21 +; GCN-NEXT: v_readlane_b32 s6, v0, 22 +; GCN-NEXT: v_readlane_b32 s7, v0, 23 +; GCN-NEXT: v_readlane_b32 s8, v0, 24 +; GCN-NEXT: v_readlane_b32 s9, v0, 25 +; GCN-NEXT: v_readlane_b32 s10, v0, 26 +; GCN-NEXT: v_readlane_b32 s11, v0, 27 +; GCN-NEXT: v_readlane_b32 s12, v0, 28 +; GCN-NEXT: v_readlane_b32 s13, v0, 29 +; GCN-NEXT: v_readlane_b32 s14, v0, 30 +; GCN-NEXT: v_readlane_b32 s15, v0, 31 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 17 -; GCN-NEXT: v_readlane_b32 s1, v0, 18 -; GCN-NEXT: v_readlane_b32 s2, v0, 19 -; GCN-NEXT: v_readlane_b32 s3, v0, 20 -; GCN-NEXT: v_readlane_b32 s4, v0, 21 -; GCN-NEXT: v_readlane_b32 s5, v0, 22 -; GCN-NEXT: v_readlane_b32 s6, v0, 23 -; GCN-NEXT: v_readlane_b32 s7, v0, 24 -; GCN-NEXT: v_readlane_b32 s8, v0, 25 -; GCN-NEXT: v_readlane_b32 s9, v0, 26 -; GCN-NEXT: v_readlane_b32 s10, v0, 27 -; GCN-NEXT: v_readlane_b32 s11, v0, 28 -; GCN-NEXT: v_readlane_b32 s12, v0, 29 -; GCN-NEXT: v_readlane_b32 s13, v0, 30 -; GCN-NEXT: v_readlane_b32 s14, v0, 31 -; GCN-NEXT: v_readlane_b32 s15, v0, 32 +; GCN-NEXT: v_readlane_b32 s0, v0, 48 +; GCN-NEXT: v_readlane_b32 s1, v0, 49 +; GCN-NEXT: v_readlane_b32 s2, v0, 50 +; GCN-NEXT: v_readlane_b32 s3, v0, 51 +; GCN-NEXT: v_readlane_b32 s4, v0, 52 +; GCN-NEXT: v_readlane_b32 s5, v0, 53 +; GCN-NEXT: v_readlane_b32 s6, v0, 54 +; GCN-NEXT: v_readlane_b32 s7, v0, 55 +; GCN-NEXT: v_readlane_b32 s8, v0, 56 +; GCN-NEXT: v_readlane_b32 s9, v0, 57 +; GCN-NEXT: v_readlane_b32 s10, v0, 58 +; GCN-NEXT: v_readlane_b32 s11, v0, 59 +; GCN-NEXT: v_readlane_b32 s12, v0, 60 +; GCN-NEXT: v_readlane_b32 s13, v0, 61 +; GCN-NEXT: v_readlane_b32 s14, v0, 62 +; GCN-NEXT: v_readlane_b32 s15, v0, 63 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:15] +; GCN-NEXT: ; use s[36:51] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 1 -; GCN-NEXT: v_readlane_b32 s1, v1, 2 -; GCN-NEXT: v_readlane_b32 s2, v1, 3 -; GCN-NEXT: v_readlane_b32 s3, v1, 4 -; GCN-NEXT: v_readlane_b32 s4, v1, 5 -; GCN-NEXT: v_readlane_b32 s5, v1, 6 -; GCN-NEXT: v_readlane_b32 s6, v1, 7 -; GCN-NEXT: v_readlane_b32 s7, v1, 8 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ; use s[20:27] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 9 -; GCN-NEXT: v_readlane_b32 s1, v1, 10 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:1] +; GCN-NEXT: ; use s[16:17] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 49 -; GCN-NEXT: v_readlane_b32 s1, v0, 50 -; GCN-NEXT: v_readlane_b32 s2, v0, 51 -; GCN-NEXT: v_readlane_b32 s3, v0, 52 -; GCN-NEXT: v_readlane_b32 s4, v0, 53 -; GCN-NEXT: v_readlane_b32 s5, v0, 54 -; GCN-NEXT: v_readlane_b32 s6, v0, 55 -; GCN-NEXT: v_readlane_b32 s7, v0, 56 -; GCN-NEXT: v_readlane_b32 s8, v0, 57 -; GCN-NEXT: v_readlane_b32 s9, v0, 58 -; GCN-NEXT: v_readlane_b32 s10, v0, 59 -; GCN-NEXT: v_readlane_b32 s11, v0, 60 -; GCN-NEXT: v_readlane_b32 s12, v0, 61 -; GCN-NEXT: v_readlane_b32 s13, v0, 62 -; GCN-NEXT: v_readlane_b32 s14, v0, 63 -; GCN-NEXT: v_readlane_b32 s15, v1, 0 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND @@ -667,12 +663,12 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 { ; GCN-LABEL: no_vgprs_last_sgpr_spill: ; GCN: ; %bb.0: -; GCN-NEXT: s_mov_b32 s56, SCRATCH_RSRC_DWORD0 -; GCN-NEXT: s_mov_b32 s57, SCRATCH_RSRC_DWORD1 -; GCN-NEXT: s_mov_b32 s58, -1 -; GCN-NEXT: s_mov_b32 s59, 0xe8f000 -; GCN-NEXT: s_add_u32 s56, s56, s3 -; GCN-NEXT: s_addc_u32 s57, s57, 0 +; GCN-NEXT: s_mov_b32 s52, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s53, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s54, -1 +; GCN-NEXT: s_mov_b32 s55, 0xe8f000 +; GCN-NEXT: s_add_u32 s52, s52, s3 +; GCN-NEXT: s_addc_u32 s53, s53, 0 ; GCN-NEXT: s_load_dword s0, s[0:1], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND @@ -689,180 +685,176 @@ ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v31, s4, 0 +; GCN-NEXT: v_writelane_b32 v31, s5, 1 +; GCN-NEXT: v_writelane_b32 v31, s6, 2 +; GCN-NEXT: v_writelane_b32 v31, s7, 3 +; GCN-NEXT: v_writelane_b32 v31, s8, 4 +; GCN-NEXT: v_writelane_b32 v31, s9, 5 +; GCN-NEXT: v_writelane_b32 v31, s10, 6 +; GCN-NEXT: v_writelane_b32 v31, s11, 7 +; GCN-NEXT: v_writelane_b32 v31, s12, 8 +; GCN-NEXT: v_writelane_b32 v31, s13, 9 +; GCN-NEXT: v_writelane_b32 v31, s14, 10 +; GCN-NEXT: v_writelane_b32 v31, s15, 11 +; GCN-NEXT: v_writelane_b32 v31, s16, 12 +; GCN-NEXT: v_writelane_b32 v31, s17, 13 +; GCN-NEXT: v_writelane_b32 v31, s18, 14 +; GCN-NEXT: v_writelane_b32 v31, s19, 15 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[36:51] +; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: v_writelane_b32 v31, s0, 0 -; GCN-NEXT: v_writelane_b32 v31, s4, 1 -; GCN-NEXT: v_writelane_b32 v31, s5, 2 -; GCN-NEXT: v_writelane_b32 v31, s6, 3 -; GCN-NEXT: v_writelane_b32 v31, s7, 4 -; GCN-NEXT: v_writelane_b32 v31, s8, 5 -; GCN-NEXT: v_writelane_b32 v31, s9, 6 -; GCN-NEXT: v_writelane_b32 v31, s10, 7 -; GCN-NEXT: v_writelane_b32 v31, s11, 8 -; GCN-NEXT: v_writelane_b32 v31, s12, 9 -; GCN-NEXT: v_writelane_b32 v31, s13, 10 -; GCN-NEXT: v_writelane_b32 v31, s14, 11 -; GCN-NEXT: v_writelane_b32 v31, s15, 12 -; GCN-NEXT: v_writelane_b32 v31, s16, 13 -; GCN-NEXT: v_writelane_b32 v31, s17, 14 -; GCN-NEXT: v_writelane_b32 v31, s18, 15 -; GCN-NEXT: v_writelane_b32 v31, s19, 16 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:15] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[16:31] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[34:35] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s33, 0 -; GCN-NEXT: v_readlane_b32 s52, v31, 0 -; GCN-NEXT: s_cmp_lg_u32 s52, s33 -; GCN-NEXT: v_writelane_b32 v31, s36, 17 -; GCN-NEXT: v_writelane_b32 v31, s37, 18 -; GCN-NEXT: v_writelane_b32 v31, s38, 19 -; GCN-NEXT: v_writelane_b32 v31, s39, 20 -; GCN-NEXT: v_writelane_b32 v31, s40, 21 -; GCN-NEXT: v_writelane_b32 v31, s41, 22 -; GCN-NEXT: v_writelane_b32 v31, s42, 23 -; GCN-NEXT: v_writelane_b32 v31, s43, 24 -; GCN-NEXT: v_writelane_b32 v31, s44, 25 -; GCN-NEXT: v_writelane_b32 v31, s45, 26 -; GCN-NEXT: v_writelane_b32 v31, s46, 27 -; GCN-NEXT: v_writelane_b32 v31, s47, 28 -; GCN-NEXT: v_writelane_b32 v31, s48, 29 -; GCN-NEXT: v_writelane_b32 v31, s49, 30 -; GCN-NEXT: v_writelane_b32 v31, s50, 31 -; GCN-NEXT: v_writelane_b32 v31, s51, 32 -; GCN-NEXT: v_writelane_b32 v31, s0, 33 -; GCN-NEXT: v_writelane_b32 v31, s1, 34 -; GCN-NEXT: v_writelane_b32 v31, s2, 35 -; GCN-NEXT: v_writelane_b32 v31, s3, 36 -; GCN-NEXT: v_writelane_b32 v31, s4, 37 -; GCN-NEXT: v_writelane_b32 v31, s5, 38 -; GCN-NEXT: v_writelane_b32 v31, s6, 39 -; GCN-NEXT: v_writelane_b32 v31, s7, 40 -; GCN-NEXT: v_writelane_b32 v31, s8, 41 -; GCN-NEXT: v_writelane_b32 v31, s9, 42 -; GCN-NEXT: v_writelane_b32 v31, s10, 43 -; GCN-NEXT: v_writelane_b32 v31, s11, 44 -; GCN-NEXT: v_writelane_b32 v31, s12, 45 -; GCN-NEXT: v_writelane_b32 v31, s13, 46 -; GCN-NEXT: v_writelane_b32 v31, s14, 47 -; GCN-NEXT: v_writelane_b32 v31, s15, 48 -; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 -; GCN-NEXT: v_writelane_b32 v0, s16, 0 -; GCN-NEXT: v_writelane_b32 v0, s17, 1 -; GCN-NEXT: v_writelane_b32 v0, s18, 2 -; GCN-NEXT: v_writelane_b32 v0, s19, 3 -; GCN-NEXT: v_writelane_b32 v0, s20, 4 -; GCN-NEXT: v_writelane_b32 v0, s21, 5 -; GCN-NEXT: v_writelane_b32 v0, s22, 6 -; GCN-NEXT: v_writelane_b32 v0, s23, 7 -; GCN-NEXT: v_writelane_b32 v0, s24, 8 -; GCN-NEXT: v_writelane_b32 v0, s25, 9 -; GCN-NEXT: v_writelane_b32 v0, s26, 10 -; GCN-NEXT: v_writelane_b32 v0, s27, 11 -; GCN-NEXT: v_writelane_b32 v0, s28, 12 -; GCN-NEXT: v_writelane_b32 v0, s29, 13 -; GCN-NEXT: v_writelane_b32 v0, s30, 14 -; GCN-NEXT: v_writelane_b32 v0, s31, 15 -; GCN-NEXT: s_mov_b64 s[16:17], exec -; GCN-NEXT: s_mov_b64 exec, 0xffff -; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: v_writelane_b32 v31, s34, 49 -; GCN-NEXT: v_writelane_b32 v31, s35, 50 -; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 -; GCN-NEXT: s_cbranch_scc1 BB2_2 -; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v31, 1 -; GCN-NEXT: v_readlane_b32 s1, v31, 2 -; GCN-NEXT: v_readlane_b32 s2, v31, 3 -; GCN-NEXT: v_readlane_b32 s3, v31, 4 -; GCN-NEXT: v_readlane_b32 s4, v31, 5 -; GCN-NEXT: v_readlane_b32 s5, v31, 6 -; GCN-NEXT: v_readlane_b32 s6, v31, 7 -; GCN-NEXT: v_readlane_b32 s7, v31, 8 -; GCN-NEXT: v_readlane_b32 s8, v31, 9 -; GCN-NEXT: v_readlane_b32 s9, v31, 10 -; GCN-NEXT: v_readlane_b32 s10, v31, 11 -; GCN-NEXT: v_readlane_b32 s11, v31, 12 -; GCN-NEXT: v_readlane_b32 s12, v31, 13 -; GCN-NEXT: v_readlane_b32 s13, v31, 14 -; GCN-NEXT: v_readlane_b32 s14, v31, 15 -; GCN-NEXT: v_readlane_b32 s15, v31, 16 +; GCN-NEXT: v_writelane_b32 v31, s4, 16 +; GCN-NEXT: v_writelane_b32 v31, s5, 17 +; GCN-NEXT: v_writelane_b32 v31, s6, 18 +; GCN-NEXT: v_writelane_b32 v31, s7, 19 +; GCN-NEXT: v_writelane_b32 v31, s8, 20 +; GCN-NEXT: v_writelane_b32 v31, s9, 21 +; GCN-NEXT: v_writelane_b32 v31, s10, 22 +; GCN-NEXT: v_writelane_b32 v31, s11, 23 +; GCN-NEXT: v_writelane_b32 v31, s12, 24 +; GCN-NEXT: v_writelane_b32 v31, s13, 25 +; GCN-NEXT: v_writelane_b32 v31, s14, 26 +; GCN-NEXT: v_writelane_b32 v31, s15, 27 +; GCN-NEXT: v_writelane_b32 v31, s16, 28 +; GCN-NEXT: v_writelane_b32 v31, s17, 29 +; GCN-NEXT: v_writelane_b32 v31, s18, 30 +; GCN-NEXT: v_writelane_b32 v31, s19, 31 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:15] +; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 17 -; GCN-NEXT: v_readlane_b32 s1, v31, 18 -; GCN-NEXT: v_readlane_b32 s2, v31, 19 -; GCN-NEXT: v_readlane_b32 s3, v31, 20 -; GCN-NEXT: v_readlane_b32 s4, v31, 21 -; GCN-NEXT: v_readlane_b32 s5, v31, 22 -; GCN-NEXT: v_readlane_b32 s6, v31, 23 -; GCN-NEXT: v_readlane_b32 s7, v31, 24 -; GCN-NEXT: v_readlane_b32 s8, v31, 25 -; GCN-NEXT: v_readlane_b32 s9, v31, 26 -; GCN-NEXT: v_readlane_b32 s10, v31, 27 -; GCN-NEXT: v_readlane_b32 s11, v31, 28 -; GCN-NEXT: v_readlane_b32 s12, v31, 29 -; GCN-NEXT: v_readlane_b32 s13, v31, 30 -; GCN-NEXT: v_readlane_b32 s14, v31, 31 -; GCN-NEXT: v_readlane_b32 s15, v31, 32 +; GCN-NEXT: v_writelane_b32 v31, s4, 32 +; GCN-NEXT: v_writelane_b32 v31, s5, 33 +; GCN-NEXT: v_writelane_b32 v31, s6, 34 +; GCN-NEXT: v_writelane_b32 v31, s7, 35 +; GCN-NEXT: v_writelane_b32 v31, s8, 36 +; GCN-NEXT: v_writelane_b32 v31, s9, 37 +; GCN-NEXT: v_writelane_b32 v31, s10, 38 +; GCN-NEXT: v_writelane_b32 v31, s11, 39 +; GCN-NEXT: v_writelane_b32 v31, s12, 40 +; GCN-NEXT: v_writelane_b32 v31, s13, 41 +; GCN-NEXT: v_writelane_b32 v31, s14, 42 +; GCN-NEXT: v_writelane_b32 v31, s15, 43 +; GCN-NEXT: v_writelane_b32 v31, s16, 44 +; GCN-NEXT: v_writelane_b32 v31, s17, 45 +; GCN-NEXT: v_writelane_b32 v31, s18, 46 +; GCN-NEXT: v_writelane_b32 v31, s19, 47 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:15] +; GCN-NEXT: ; def s[4:19] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v31, s4, 48 +; GCN-NEXT: v_writelane_b32 v31, s5, 49 +; GCN-NEXT: v_writelane_b32 v31, s6, 50 +; GCN-NEXT: v_writelane_b32 v31, s7, 51 +; GCN-NEXT: v_writelane_b32 v31, s8, 52 +; GCN-NEXT: v_writelane_b32 v31, s9, 53 +; GCN-NEXT: v_writelane_b32 v31, s10, 54 +; GCN-NEXT: v_writelane_b32 v31, s11, 55 +; GCN-NEXT: v_writelane_b32 v31, s12, 56 +; GCN-NEXT: v_writelane_b32 v31, s13, 57 +; GCN-NEXT: v_writelane_b32 v31, s14, 58 +; GCN-NEXT: v_writelane_b32 v31, s15, 59 +; GCN-NEXT: v_writelane_b32 v31, s16, 60 +; GCN-NEXT: v_writelane_b32 v31, s17, 61 +; GCN-NEXT: v_writelane_b32 v31, s18, 62 +; GCN-NEXT: v_writelane_b32 v31, s19, 63 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[2:3] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s2, 0 +; GCN-NEXT: v_writelane_b32 v0, s3, 1 +; GCN-NEXT: s_mov_b64 s[2:3], exec +; GCN-NEXT: s_mov_b64 exec, 3 +; GCN-NEXT: buffer_store_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[2:3] +; GCN-NEXT: s_mov_b32 s1, 0 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: s_cmp_lg_u32 s0, s1 +; GCN-NEXT: s_cbranch_scc1 BB2_2 +; GCN-NEXT: ; %bb.1: ; %bb0 +; GCN-NEXT: v_readlane_b32 s36, v31, 32 +; GCN-NEXT: v_readlane_b32 s37, v31, 33 +; GCN-NEXT: v_readlane_b32 s38, v31, 34 +; GCN-NEXT: v_readlane_b32 s39, v31, 35 +; GCN-NEXT: v_readlane_b32 s40, v31, 36 +; GCN-NEXT: v_readlane_b32 s41, v31, 37 +; GCN-NEXT: v_readlane_b32 s42, v31, 38 +; GCN-NEXT: v_readlane_b32 s43, v31, 39 +; GCN-NEXT: v_readlane_b32 s44, v31, 40 +; GCN-NEXT: v_readlane_b32 s45, v31, 41 +; GCN-NEXT: v_readlane_b32 s46, v31, 42 +; GCN-NEXT: v_readlane_b32 s47, v31, 43 +; GCN-NEXT: v_readlane_b32 s48, v31, 44 +; GCN-NEXT: v_readlane_b32 s49, v31, 45 +; GCN-NEXT: v_readlane_b32 s50, v31, 46 +; GCN-NEXT: v_readlane_b32 s51, v31, 47 +; GCN-NEXT: v_readlane_b32 s0, v31, 16 +; GCN-NEXT: v_readlane_b32 s1, v31, 17 +; GCN-NEXT: v_readlane_b32 s2, v31, 18 +; GCN-NEXT: v_readlane_b32 s3, v31, 19 +; GCN-NEXT: v_readlane_b32 s4, v31, 20 +; GCN-NEXT: v_readlane_b32 s5, v31, 21 +; GCN-NEXT: v_readlane_b32 s6, v31, 22 +; GCN-NEXT: v_readlane_b32 s7, v31, 23 +; GCN-NEXT: v_readlane_b32 s8, v31, 24 +; GCN-NEXT: v_readlane_b32 s9, v31, 25 +; GCN-NEXT: v_readlane_b32 s10, v31, 26 +; GCN-NEXT: v_readlane_b32 s11, v31, 27 +; GCN-NEXT: v_readlane_b32 s12, v31, 28 +; GCN-NEXT: v_readlane_b32 s13, v31, 29 +; GCN-NEXT: v_readlane_b32 s14, v31, 30 +; GCN-NEXT: v_readlane_b32 s15, v31, 31 +; GCN-NEXT: v_readlane_b32 s16, v31, 0 +; GCN-NEXT: v_readlane_b32 s17, v31, 1 +; GCN-NEXT: v_readlane_b32 s18, v31, 2 +; GCN-NEXT: v_readlane_b32 s19, v31, 3 +; GCN-NEXT: v_readlane_b32 s20, v31, 4 +; GCN-NEXT: v_readlane_b32 s21, v31, 5 +; GCN-NEXT: v_readlane_b32 s22, v31, 6 +; GCN-NEXT: v_readlane_b32 s23, v31, 7 +; GCN-NEXT: v_readlane_b32 s24, v31, 8 +; GCN-NEXT: v_readlane_b32 s25, v31, 9 +; GCN-NEXT: v_readlane_b32 s26, v31, 10 +; GCN-NEXT: v_readlane_b32 s27, v31, 11 +; GCN-NEXT: v_readlane_b32 s28, v31, 12 +; GCN-NEXT: v_readlane_b32 s29, v31, 13 +; GCN-NEXT: v_readlane_b32 s30, v31, 14 +; GCN-NEXT: v_readlane_b32 s31, v31, 15 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[16:31] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 33 -; GCN-NEXT: v_readlane_b32 s1, v31, 34 -; GCN-NEXT: v_readlane_b32 s2, v31, 35 -; GCN-NEXT: v_readlane_b32 s3, v31, 36 -; GCN-NEXT: v_readlane_b32 s4, v31, 37 -; GCN-NEXT: v_readlane_b32 s5, v31, 38 -; GCN-NEXT: v_readlane_b32 s6, v31, 39 -; GCN-NEXT: v_readlane_b32 s7, v31, 40 -; GCN-NEXT: v_readlane_b32 s8, v31, 41 -; GCN-NEXT: v_readlane_b32 s9, v31, 42 -; GCN-NEXT: v_readlane_b32 s10, v31, 43 -; GCN-NEXT: v_readlane_b32 s11, v31, 44 -; GCN-NEXT: v_readlane_b32 s12, v31, 45 -; GCN-NEXT: v_readlane_b32 s13, v31, 46 -; GCN-NEXT: v_readlane_b32 s14, v31, 47 -; GCN-NEXT: v_readlane_b32 s15, v31, 48 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_readlane_b32 s4, v31, 48 +; GCN-NEXT: v_readlane_b32 s5, v31, 49 +; GCN-NEXT: v_readlane_b32 s6, v31, 50 +; GCN-NEXT: v_readlane_b32 s7, v31, 51 +; GCN-NEXT: v_readlane_b32 s8, v31, 52 +; GCN-NEXT: v_readlane_b32 s9, v31, 53 +; GCN-NEXT: v_readlane_b32 s10, v31, 54 +; GCN-NEXT: v_readlane_b32 s11, v31, 55 +; GCN-NEXT: v_readlane_b32 s12, v31, 56 +; GCN-NEXT: v_readlane_b32 s13, v31, 57 +; GCN-NEXT: v_readlane_b32 s14, v31, 58 +; GCN-NEXT: v_readlane_b32 s15, v31, 59 +; GCN-NEXT: v_readlane_b32 s16, v31, 60 +; GCN-NEXT: v_readlane_b32 s17, v31, 61 +; GCN-NEXT: v_readlane_b32 s18, v31, 62 +; GCN-NEXT: v_readlane_b32 s19, v31, 63 ; GCN-NEXT: s_mov_b64 s[0:1], exec -; GCN-NEXT: s_mov_b64 exec, 0xffff -; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, 3 +; GCN-NEXT: buffer_load_dword v0, off, s[52:55], 0 offset:4 ; 4-byte Folded Reload ; GCN-NEXT: s_mov_b64 exec, s[0:1] ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_readlane_b32 s0, v0, 0 ; GCN-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-NEXT: v_readlane_b32 s2, v0, 2 -; GCN-NEXT: v_readlane_b32 s3, v0, 3 -; GCN-NEXT: v_readlane_b32 s4, v0, 4 -; GCN-NEXT: v_readlane_b32 s5, v0, 5 -; GCN-NEXT: v_readlane_b32 s6, v0, 6 -; GCN-NEXT: v_readlane_b32 s7, v0, 7 -; GCN-NEXT: v_readlane_b32 s8, v0, 8 -; GCN-NEXT: v_readlane_b32 s9, v0, 9 -; GCN-NEXT: v_readlane_b32 s10, v0, 10 -; GCN-NEXT: v_readlane_b32 s11, v0, 11 -; GCN-NEXT: v_readlane_b32 s12, v0, 12 -; GCN-NEXT: v_readlane_b32 s13, v0, 13 -; GCN-NEXT: v_readlane_b32 s14, v0, 14 -; GCN-NEXT: v_readlane_b32 s15, v0, 15 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[0:15] +; GCN-NEXT: ; use s[36:51] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[4:19] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 49 -; GCN-NEXT: v_readlane_b32 s1, v31, 50 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:1] ; GCN-NEXT: ;;#ASMEND Index: llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll +++ llvm/test/CodeGen/AMDGPU/reserve-vgpr-for-sgpr-spill.ll @@ -11,8 +11,8 @@ ; GCN: v_writelane_b32 v255, s30, 0 ; GCN: v_writelane_b32 v255, s31, 1 ; GCN: s_swappc_b64 s[30:31], s[4:5] -; GCN: v_readlane_b32 s4, v255, 0 -; GCN: v_readlane_b32 s5, v255, 1 +; GCN: v_readlane_b32 s30, v255, 0 +; GCN: v_readlane_b32 s31, v255, 1 ; GCN: v_readlane_b32 s33, v255, 2 ; GCN: ; NumVgprs: 256 @@ -57,8 +57,8 @@ ; GCN: v_writelane_b32 v254, s30, 0 ; GCN: v_writelane_b32 v254, s31, 1 ; GCN: s_swappc_b64 s[30:31], s[4:5] -; GCN: v_readlane_b32 s4, v254, 0 -; GCN: v_readlane_b32 s5, v254, 1 +; GCN: v_readlane_b32 s30, v254, 0 +; GCN: v_readlane_b32 s31, v254, 1 ; GCN: v_readlane_b32 s33, v254, 2 define void @reserve_lowest_available_vgpr() #0 { Index: llvm/test/CodeGen/AMDGPU/spill-agpr.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/spill-agpr.mir +++ llvm/test/CodeGen/AMDGPU/spill-agpr.mir @@ -13,25 +13,25 @@ ; SPILLED: bb.0: ; SPILLED: successors: %bb.1(0x80000000) ; SPILLED: S_NOP 0, implicit-def renamable $agpr0 - ; SPILLED: S_NOP 0, implicit-def renamable $agpr1 + ; SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) + ; SPILLED: S_NOP 0, implicit-def renamable $agpr0 ; SPILLED: SI_SPILL_A32_SAVE killed $agpr0, %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) - ; SPILLED: SI_SPILL_A32_SAVE killed $agpr1, %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (store 4 into %stack.1, addrspace 5) ; SPILLED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: S_NOP 1 ; SPILLED: bb.2: - ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) - ; SPILLED: $agpr1 = SI_SPILL_A32_RESTORE %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0, implicit renamable $agpr1 + ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.1, addrspace 5) + ; SPILLED: $agpr1 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0, implicit killed renamable $agpr1 ; EXPANDED-LABEL: name: spill_restore_agpr32 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) ; EXPANDED: liveins: $vgpr0, $vgpr1 ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0 - ; EXPANDED: S_NOP 0, implicit-def renamable $agpr1 ; EXPANDED: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec - ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec + ; EXPANDED: S_NOP 0, implicit-def renamable $agpr0 + ; EXPANDED: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec ; EXPANDED: S_CBRANCH_SCC1 %bb.1, implicit undef $scc ; EXPANDED: bb.1: ; EXPANDED: successors: %bb.2(0x80000000) @@ -41,7 +41,7 @@ ; EXPANDED: liveins: $vgpr0, $vgpr1 ; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec ; EXPANDED: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec - ; EXPANDED: S_NOP 0, implicit renamable $agpr0, implicit renamable $agpr1 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0, implicit killed renamable $agpr1 bb.0: S_NOP 0, implicit-def %0:agpr_32 S_NOP 0, implicit-def %1:agpr_32 @@ -72,7 +72,7 @@ ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1 = SI_SPILL_A64_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 8 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1 ; EXPANDED-LABEL: name: spill_restore_agpr64 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -89,7 +89,7 @@ ; EXPANDED: liveins: $vgpr0, $vgpr1 ; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1 ; EXPANDED: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1 bb.0: S_NOP 0, implicit-def %0:areg_64 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -118,6 +118,7 @@ ; SPILLED: bb.1: ; SPILLED: successors: %bb.2(0x80000000) ; SPILLED: bb.2: + ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) ; SPILLED: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; SPILLED: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; SPILLED: S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 @@ -134,8 +135,7 @@ ; SPILLED: S_NOP 0, implicit undef $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; SPILLED: S_NOP 0, implicit undef $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247 ; SPILLED: S_NOP 0, implicit undef $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255 - ; SPILLED: $agpr0 = SI_SPILL_A32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0 ; EXPANDED-LABEL: name: spill_restore_agpr32_used_all_vgprs ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -149,6 +149,8 @@ ; EXPANDED: bb.1: ; EXPANDED: successors: %bb.2(0x80000000) ; EXPANDED: bb.2: + ; EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec ; EXPANDED: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 ; EXPANDED: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47 ; EXPANDED: S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63 @@ -165,9 +167,7 @@ ; EXPANDED: S_NOP 0, implicit undef $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239 ; EXPANDED: S_NOP 0, implicit undef $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247 ; EXPANDED: S_NOP 0, implicit undef $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255 - ; EXPANDED: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) - ; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec - ; EXPANDED: S_NOP 0, implicit renamable $agpr0 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0 bb.0: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255 @@ -214,7 +214,7 @@ ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1_agpr2 = SI_SPILL_A96_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 12 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2 ; EXPANDED-LABEL: name: spill_restore_agpr96 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -233,7 +233,7 @@ ; EXPANDED: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 ; EXPANDED: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2 ; EXPANDED: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2 bb.0: S_NOP 0, implicit-def %0:areg_96 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -263,7 +263,7 @@ ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 16 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3 ; EXPANDED-LABEL: name: spill_restore_agpr128 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -284,7 +284,7 @@ ; EXPANDED: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; EXPANDED: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 ; EXPANDED: $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3 bb.0: S_NOP 0, implicit-def %0:areg_128 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -314,7 +314,7 @@ ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4 = SI_SPILL_A160_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 20 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4 ; EXPANDED-LABEL: name: spill_restore_agpr160 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -337,7 +337,7 @@ ; EXPANDED: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 ; EXPANDED: $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 ; EXPANDED: $agpr4 = V_ACCVGPR_WRITE_B32 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4 bb.0: S_NOP 0, implicit-def %0:areg_160 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -367,7 +367,7 @@ ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = SI_SPILL_A192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; EXPANDED-LABEL: name: spill_restore_agpr192 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -392,7 +392,7 @@ ; EXPANDED: $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; EXPANDED: $agpr4 = V_ACCVGPR_WRITE_B32 $vgpr4, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 ; EXPANDED: $agpr5 = V_ACCVGPR_WRITE_B32 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 bb.0: S_NOP 0, implicit-def %0:areg_192 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -422,7 +422,7 @@ ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = SI_SPILL_A256_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 32 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; EXPANDED-LABEL: name: spill_restore_agpr256 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -451,7 +451,7 @@ ; EXPANDED: $agpr5 = V_ACCVGPR_WRITE_B32 $vgpr5, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; EXPANDED: $agpr6 = V_ACCVGPR_WRITE_B32 $vgpr6, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 ; EXPANDED: $agpr7 = V_ACCVGPR_WRITE_B32 $vgpr7, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 bb.0: S_NOP 0, implicit-def %0:areg_256 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -481,7 +481,7 @@ ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = SI_SPILL_A512_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 64 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; EXPANDED-LABEL: name: spill_restore_agpr512 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -526,7 +526,7 @@ ; EXPANDED: $agpr13 = V_ACCVGPR_WRITE_B32 $vgpr13, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; EXPANDED: $agpr14 = V_ACCVGPR_WRITE_B32 $vgpr14, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 ; EXPANDED: $agpr15 = V_ACCVGPR_WRITE_B32 $vgpr15, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 bb.0: S_NOP 0, implicit-def %0:areg_512 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -556,7 +556,7 @@ ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = SI_SPILL_A1024_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 128 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; SPILLED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; EXPANDED-LABEL: name: spill_restore_agpr1024 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -633,7 +633,7 @@ ; EXPANDED: $agpr29 = V_ACCVGPR_WRITE_B32 $vgpr29, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; EXPANDED: $agpr30 = V_ACCVGPR_WRITE_B32 $vgpr30, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 ; EXPANDED: $agpr31 = V_ACCVGPR_WRITE_B32 $vgpr31, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 - ; EXPANDED: S_NOP 0, implicit renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 + ; EXPANDED: S_NOP 0, implicit killed renamable $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 bb.0: S_NOP 0, implicit-def %0:areg_1024 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 Index: llvm/test/CodeGen/AMDGPU/spill-m0.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/spill-m0.ll +++ llvm/test/CodeGen/AMDGPU/spill-m0.ll @@ -1,28 +1,32 @@ -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVGPR -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s -; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=TOVMEM -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVGPR -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=1 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVGPR -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVMEM -check-prefix=GCN %s +; RUN: llc -O0 -amdgpu-spill-sgpr-to-vgpr=0 -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=TOVMEM -check-prefix=GCN %s ; XXX - Why does it like to use vcc? ; GCN-LABEL: {{^}}spill_m0: -; GCN-DAG: s_cmp_lg_u32 +; GCN: #ASMSTART +; GCN-NEXT: s_mov_b32 m0, 0 +; GCN-NEXT: #ASMEND +; GCN-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 -; TOVGPR-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 -; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 2 +; TOVGPR: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], [[M0_LANE:[0-9]+]] -; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0 -; TOVMEM-DAG: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0 -; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 ; 4-byte Folded Spill +; TOVMEM: v_writelane_b32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]], 0 +; TOVMEM: s_mov_b32 [[COPY_EXEC_LO:s[0-9]+]], exec_lo +; TOVMEM: s_mov_b32 exec_lo, 1 +; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 ; 4-byte Folded Spill +; TOVMEM: s_mov_b32 exec_lo, [[COPY_EXEC_LO]] ; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]] ; GCN: [[ENDIF]]: -; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], 2 +; TOVGPR: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[SPILL_VREG]], [[M0_LANE]] ; TOVGPR: s_mov_b32 m0, [[M0_RESTORE]] -; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 ; 4-byte Folded Reload +; TOVMEM: buffer_load_dword [[RELOAD_VREG:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:4 ; 4-byte Folded Reload ; TOVMEM: s_waitcnt vmcnt(0) ; TOVMEM: v_readlane_b32 [[M0_RESTORE:s[0-9]+]], [[RELOAD_VREG]], 0 ; TOVMEM: s_mov_b32 m0, [[M0_RESTORE]] @@ -48,8 +52,6 @@ ; m0 is killed, so it isn't necessary during the entry block spill to preserve it ; GCN-LABEL: {{^}}spill_kill_m0_lds: -; GCN: s_mov_b32 m0, s6 -; GCN: v_interp_mov_f32 ; GCN-NOT: v_readlane_b32 m0 ; GCN-NOT: s_buffer_store_dword m0 @@ -79,10 +81,11 @@ ; Force save and restore of m0 during SMEM spill ; GCN-LABEL: {{^}}m0_unavailable_spill: +; GCN: s_load_dword [[REG0:s[0-9]+]], s[0:1], {{0x[0-9]+}} ; GCN: ; def m0, 1 -; GCN: s_mov_b32 m0, s0 +; GCN: s_mov_b32 m0, [[REG0]] ; GCN: v_interp_mov_f32 ; GCN: ; clobber m0 @@ -124,16 +127,17 @@ } ; GCN-LABEL: {{^}}restore_m0_lds: -; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]] -; TOSMEM: s_cmp_eq_u32 ; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it ; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_add_u32 m0, s3, 0x100 -; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill +; TOSMEM: s_add_u32 m0, s3, {{0x[0-9]+}} +; TOSMEM: s_buffer_store_dword s1, s[88:91], m0 ; 4-byte Folded Spill ; FIXME-TOSMEM-NOT: m0 -; TOSMEM: s_add_u32 m0, s3, 0x200 +; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]] +; TOSMEM: s_add_u32 m0, s3, {{0x[0-9]+}} +; TOSMEM: s_waitcnt lgkmcnt(0) ; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill ; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_cmp_eq_u32 ; TOSMEM: s_cbranch_scc1 ; TOSMEM: s_mov_b32 m0, -1 @@ -150,6 +154,13 @@ ; TOSMEM: s_add_u32 m0, s3, 0x100 ; TOSMEM: s_buffer_load_dword s2, s[88:91], m0 ; 4-byte Folded Reload ; FIXME-TOSMEM-NOT: m0 + +; TOSMEM: s_mov_b32 [[REG1:s[0-9]+]], m0 +; TOSMEM: s_add_u32 m0, s3, 0x100 +; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload +; TOSMEM: s_mov_b32 m0, [[REG1]] +; TOSMEM: s_mov_b32 m0, -1 + ; TOSMEM: s_waitcnt lgkmcnt(0) ; TOSMEM-NOT: m0 ; TOSMEM: s_mov_b32 m0, s2 Index: llvm/test/CodeGen/AMDGPU/spill192.mir =================================================================== --- llvm/test/CodeGen/AMDGPU/spill192.mir +++ llvm/test/CodeGen/AMDGPU/spill192.mir @@ -24,7 +24,7 @@ ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 = SI_SPILL_S192_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 24 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; SPILLED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 ; EXPANDED-LABEL: name: spill_restore_sgpr192 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -49,7 +49,7 @@ ; EXPANDED: $sgpr7 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 3 ; EXPANDED: $sgpr8 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 4 ; EXPANDED: $sgpr9 = V_READLANE_B32_gfx6_gfx7 $vgpr0, 5 - ; EXPANDED: S_NOP 0, implicit renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 + ; EXPANDED: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9 bb.0: S_NOP 0, implicit-def %0:sgpr_192 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 @@ -79,7 +79,7 @@ ; SPILLED: S_NOP 1 ; SPILLED: bb.2: ; SPILLED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) - ; SPILLED: S_NOP 0, implicit renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; SPILLED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 ; EXPANDED-LABEL: name: spill_restore_vgpr192 ; EXPANDED: bb.0: ; EXPANDED: successors: %bb.1(0x80000000) @@ -91,7 +91,7 @@ ; EXPANDED: S_NOP 1 ; EXPANDED: bb.2: ; EXPANDED: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = SI_SPILL_V192_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 24 from %stack.0, align 4, addrspace 5) - ; EXPANDED: S_NOP 0, implicit renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 + ; EXPANDED: S_NOP 0, implicit killed renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 bb.0: S_NOP 0, implicit-def %0:vreg_192 S_CBRANCH_SCC1 implicit undef $scc, %bb.1 Index: llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/unexpected-reg-unit-state.mir @@ -0,0 +1,32 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=regallocfast -o - %s | FileCheck %s + +--- +name: bar +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + stackPtrOffsetReg: '$sgpr32' +body: | + bb.0: + liveins: $vgpr0 + + ; CHECK-LABEL: name: bar + ; CHECK: liveins: $vgpr0 + ; CHECK: V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec + ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc + ; CHECK: SI_SPILL_S64_SAVE $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (store 8 into %stack.0, align 4, addrspace 5) + ; CHECK: renamable $sgpr4_sgpr5 = COPY $vcc + ; CHECK: $vcc = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr32 :: (load 8 from %stack.0, align 4, addrspace 5) + ; CHECK: renamable $vgpr0 = V_CNDMASK_B32_e64 0, -1, 0, 3, killed $sgpr4_sgpr5, implicit $exec + ; CHECK: S_ENDPGM 0, implicit killed $vgpr0, implicit killed renamable $vcc + %0:vgpr_32 = COPY $vgpr0 + V_CMP_NE_U32_e32 0, %0, implicit-def $vcc, implicit $exec + %3:sreg_64_xexec = COPY $vcc + %1:sreg_64_xexec = COPY $vcc + %2:vgpr_32 = V_CNDMASK_B32_e64 0, -1, 0, 3, %1, implicit $exec + $vgpr0 = COPY %2 + S_ENDPGM 0, implicit $vgpr0, implicit %3 + +... Index: llvm/test/CodeGen/AMDGPU/wwm-reserved.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/wwm-reserved.ll +++ llvm/test/CodeGen/AMDGPU/wwm-reserved.ll @@ -69,8 +69,8 @@ merge: %merge_value = phi i32 [ 0, %entry ], [%tmp137, %if ] ; GFX9-O3: v_cmp_eq_u32_e32 vcc, v[[FIRST]], v[[SECOND]] -; GFX9-O0: buffer_load_dword v[[SECOND:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[SECOND_IMM_OFFSET]] ; GFX9-O0: buffer_load_dword v[[FIRST:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[FIRST_IMM_OFFSET]] +; GFX9-O0: buffer_load_dword v[[SECOND:[0-9]+]], off, s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, 0 offset:[[SECOND_IMM_OFFSET]] ; GFX9-O0: v_cmp_eq_u32_e64 s{{\[}}{{[0-9]+}}:{{[0-9]+}}{{\]}}, v[[FIRST]], v[[SECOND]] %tmp138 = icmp eq i32 %tmp122, %merge_value %tmp139 = sext i1 %tmp138 to i32 @@ -82,7 +82,7 @@ } ; GFX9-LABEL: {{^}}called: -define i32 @called(i32 %a) noinline { +define hidden i32 @called(i32 %a) noinline { ; GFX9: v_add_u32_e32 v1, v0, v0 %add = add i32 %a, %a ; GFX9: v_mul_lo_u32 v0, v1, v0 @@ -94,10 +94,15 @@ ; GFX9-LABEL: {{^}}call: define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) { -; GFX9-O0: v_mov_b32_e32 v0, s0 -; GFX9-O3: v_mov_b32_e32 v2, s0 +; GFX9-DAG: s_load_dword [[ARG:s[0-9]+]] +; GFX9-O0-DAG: s_mov_b32 s0, 0{{$}} +; GFX9-O0-DAG: v_mov_b32_e32 v0, [[ARG]] + +; GFX9-O3: v_mov_b32_e32 v2, [[ARG]] + + ; GFX9-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s1 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s0 ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_not_b64 exec, exec %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) @@ -107,12 +112,11 @@ %tmp134 = call i32 @called(i32 %tmp107) ; GFX9-O0: buffer_load_dword v1 ; GFX9-O3: v_mov_b32_e32 v1, v0 -; GFX9-O0: v_add_u32_e32 v0, v0, v1 +; GFX9-O0: v_add_u32_e32 v1, v0, v1 ; GFX9-O3: v_add_u32_e32 v1, v1, v2 %tmp136 = add i32 %tmp134, %tmp107 %tmp137 = tail call i32 @llvm.amdgcn.wwm.i32(i32 %tmp136) -; GFX9-O0: buffer_store_dword v2 -; GFX9-O3: buffer_store_dword v0 +; GFX9: buffer_store_dword v0 call void @llvm.amdgcn.raw.buffer.store.i32(i32 %tmp137, <4 x i32> %tmp14, i32 4, i32 0, i32 0) ret void } @@ -127,19 +131,24 @@ ; GFX9-LABEL: {{^}}call_i64: define amdgpu_kernel void @call_i64(<4 x i32> inreg %tmp14, i64 inreg %arg) { -; GFX9-O0: v_mov_b32_e32 v0, s0 -; GFX9-O0: v_mov_b32_e32 v1, s1 -; GFX9-O3: v_mov_b32_e32 v7, s1 -; GFX9-O3: v_mov_b32_e32 v6, s0 -; GFX9-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-O0-NEXT: v_mov_b32_e32 v1, s3 +; GFX9: s_load_dwordx2 s{{\[}}[[ARG_LO:[0-9]+]]:[[ARG_HI:[0-9]+]]{{\]}} + +; GFX9-O0: s_mov_b64 s{{\[}}[[ZERO_LO:[0-9]+]]:[[ZERO_HI:[0-9]+]]{{\]}}, 0{{$}} +; GFX9-O0: v_mov_b32_e32 v1, s[[ARG_LO]] +; GFX9-O0: v_mov_b32_e32 v2, s[[ARG_HI]] + +; GFX9-O3-DAG: v_mov_b32_e32 v7, s[[ARG_HI]] +; GFX9-O3-DAG: v_mov_b32_e32 v6, s[[ARG_LO]] + +; GFX9: s_not_b64 exec, exec +; GFX9-O0-NEXT: v_mov_b32_e32 v1, s[[ZERO_LO]] +; GFX9-O0-NEXT: v_mov_b32_e32 v2, s[[ZERO_HI]] ; GFX9-O3-NEXT: v_mov_b32_e32 v6, 0 ; GFX9-O3-NEXT: v_mov_b32_e32 v7, 0 ; GFX9-NEXT: s_not_b64 exec, exec %tmp107 = tail call i64 @llvm.amdgcn.set.inactive.i64(i64 %arg, i64 0) -; GFX9-O0: buffer_store_dword v0 ; GFX9-O0: buffer_store_dword v1 +; GFX9-O0: buffer_store_dword v2 ; GFX9: s_swappc_b64 %tmp134 = call i64 @called_i64(i64 %tmp107) ; GFX9-O0: buffer_load_dword v4 Index: llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll =================================================================== --- llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll +++ llvm/test/CodeGen/ARM/2010-08-04-StackVariable.ll @@ -8,6 +8,9 @@ define i32 @_Z3fooi4SVal(i32 %i, %struct.SVal* noalias %location) #0 !dbg !4 { entry: %"alloca point" = bitcast i32 0 to i32 + br label %realentry + +realentry: call void @llvm.dbg.value(metadata i32 %i, metadata !21, metadata !DIExpression()), !dbg !22 call void @llvm.dbg.value(metadata %struct.SVal* %location, metadata !23, metadata !DIExpression()), !dbg !22 %tmp = icmp ne i32 %i, 0, !dbg !25 Index: llvm/test/CodeGen/ARM/Windows/alloca.ll =================================================================== --- llvm/test/CodeGen/ARM/Windows/alloca.ll +++ llvm/test/CodeGen/ARM/Windows/alloca.ll @@ -17,10 +17,11 @@ ; CHECK: bl num_entries ; Any register is actually valid here, but turns out we use lr, ; because we do not have the kill flag on R0. -; CHECK: movs [[R1:r1]], #7 -; CHECK: add.w [[R0:r[0-9]+]], [[R1]], [[R0]], lsl #2 -; CHECK: bic [[R0]], [[R0]], #4 -; CHECK: lsrs r4, [[R0]], #2 +; CHECK: mov [[R0:r[0-9]+]], r0 +; CHECK: movs [[R1:r[0-9]+]], #7 +; CHECK: add.w [[R2:r[0-9]+]], [[R1]], [[R0]], lsl #2 +; CHECK: bic [[R2]], [[R2]], #4 +; CHECK: lsrs r4, [[R2]], #2 ; CHECK: bl __chkstk ; CHECK: sub.w sp, sp, r4 Index: llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll =================================================================== --- llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll +++ llvm/test/CodeGen/ARM/cmpxchg-O0-be.ll @@ -7,12 +7,10 @@ ; CHECK_LABEL: main: ; CHECK: ldr [[R2:r[0-9]+]], {{\[}}[[R1:r[0-9]+]]{{\]}} ; CHECK-NEXT: ldr [[R1]], {{\[}}[[R1]], #4] -; CHECK: mov [[R4:r[0-9]+]], [[R2]] -; CHECK-NEXT: mov [[R5:r[0-9]+]], [[R1]] -; CHECK: ldr [[R2]], {{\[}}[[R1]]{{\]}} -; CHECK-NEXT: ldr [[R1]], {{\[}}[[R1]], #4] -; CHECK: mov [[R6:r[0-9]+]], [[R2]] -; CHECK-NEXT: mov [[R7:r[0-9]+]], [[R1]] +; CHECK: mov [[R4:r[0-9]+]], [[R1]] +; CHECK: ldr [[R5:r[0-9]+]], {{\[}}[[R1]]{{\]}} +; CHECK-NEXT: ldr [[R6:r[0-9]+]], {{\[}}[[R1]], #4] +; CHECK: mov [[R7:r[0-9]+]], [[R6]] define arm_aapcs_vfpcc i32 @main() #0 { entry: Index: llvm/test/CodeGen/ARM/cmpxchg-O0.ll =================================================================== --- llvm/test/CodeGen/ARM/cmpxchg-O0.ll +++ llvm/test/CodeGen/ARM/cmpxchg-O0.ll @@ -7,19 +7,21 @@ define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_8: +; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0 +; CHECK-DAG: mov [[NEW:r[0-9]+]], r2 ; CHECK: dmb ish ; CHECK: uxtb [[DESIRED:r[0-9]+]], [[DESIRED]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrexb [[OLD:[lr0-9]+]], [r0] +; CHECK: ldrexb [[OLD:[lr0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexb [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: strexb [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: ; Materialisation of a boolean is done with sub/clz/lsr ; CHECK: uxtb [[CMP1:r[0-9]+]], [[DESIRED]] -; CHECK: sub{{(\.w)?}} [[CMP1]], [[OLD]], [[CMP1]] +; CHECK: sub{{(\.w|s)?}} [[CMP1]], [[OLD]], [[CMP1]] ; CHECK: clz [[CMP2:r[0-9]+]], [[CMP1]] ; CHECK: lsr{{(s)?}} {{r[0-9]+}}, [[CMP2]], #5 ; CHECK: dmb ish @@ -29,19 +31,21 @@ define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_16: +; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0 +; CHECK-DAG: mov [[NEW:r[0-9]+]], r2 ; CHECK: dmb ish ; CHECK: uxth [[DESIRED:r[0-9]+]], [[DESIRED]] ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrexh [[OLD:[lr0-9]+]], [r0] +; CHECK: ldrexh [[OLD:[lr0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexh [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: strexh [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: ; Materialisation of a boolean is done with sub/clz/lsr ; CHECK: uxth [[CMP1:r[0-9]+]], [[DESIRED]] -; CHECK: sub{{(\.w)?}} [[CMP1]], [[OLD]], [[CMP1]] +; CHECK: sub{{(\.w|s)?}} [[CMP1]], [[OLD]], [[CMP1]] ; CHECK: clz [[CMP2:r[0-9]+]], [[CMP1]] ; CHECK: lsr{{(s)?}} {{r[0-9]+}}, [[CMP2]], #5 ; CHECK: dmb ish @@ -51,13 +55,15 @@ define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_32: +; CHECK-DAG: mov [[ADDR:r[0-9]+]], r0 +; CHECK-DAG: mov [[NEW:r[0-9]+]], r2 ; CHECK: dmb ish ; CHECK-NOT: uxt ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrex [[OLD:r[0-9]+]], [r0] +; CHECK: ldrex [[OLD:r[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLD]], [[DESIRED]] ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: strex [[STATUS:r[0-9]+]], [[NEW]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: @@ -72,14 +78,15 @@ define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind { ; CHECK-LABEL: test_cmpxchg_64: +; CHECK: mov [[ADDR:r[0-9]+]], r0 ; CHECK: dmb ish ; CHECK-NOT: uxt ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0] +; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLDLO]], r6 ; CHECK: cmpeq [[OLDHI]], r7 ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexd [[STATUS:[lr0-9]+]], r4, r5, [r0] +; CHECK: strexd [[STATUS:[lr0-9]+]], r8, r9, [r1] ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: @@ -90,14 +97,15 @@ define { i64, i1 } @test_nontrivial_args(i64* %addr, i64 %desired, i64 %new) { ; CHECK-LABEL: test_nontrivial_args: +; CHECK: mov [[ADDR:r[0-9]+]], r0 ; CHECK: dmb ish ; CHECK-NOT: uxt ; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: -; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0] +; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp [[OLDLO]], {{r[0-9]+}} ; CHECK: cmpeq [[OLDHI]], {{r[0-9]+}} ; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] -; CHECK: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, [r0] +; CHECK: strexd [[STATUS:r[0-9]+]], {{r[0-9]+}}, {{r[0-9]+}}, {{\[}}[[ADDR]]{{\]}} ; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 ; CHECK: bne [[RETRY]] ; CHECK: [[DONE]]: Index: llvm/test/CodeGen/ARM/crash-greedy-v6.ll =================================================================== --- llvm/test/CodeGen/ARM/crash-greedy-v6.ll +++ llvm/test/CodeGen/ARM/crash-greedy-v6.ll @@ -14,21 +14,21 @@ for.body: ; preds = %for.body, %for.body.lr.ph ; SOURCE-SCHED: ldr ; SOURCE-SCHED: ldr -; SOURCE-SCHED: add ; SOURCE-SCHED: ldr -; SOURCE-SCHED: add ; SOURCE-SCHED: ldr -; SOURCE-SCHED: add ; SOURCE-SCHED: ldr ; SOURCE-SCHED: add +; SOURCE-SCHED: add +; SOURCE-SCHED: add +; SOURCE-SCHED: add +; SOURCE-SCHED: ldr ; SOURCE-SCHED: str ; SOURCE-SCHED: str ; SOURCE-SCHED: str ; SOURCE-SCHED: str -; SOURCE-SCHED: ldr ; SOURCE-SCHED: bl -; SOURCE-SCHED: add ; SOURCE-SCHED: ldr +; SOURCE-SCHED: add ; SOURCE-SCHED: cmp ; SOURCE-SCHED: bne %i.031 = phi i32 [ 0, %for.body.lr.ph ], [ %0, %for.body ] Index: llvm/test/CodeGen/ARM/debug-info-blocks.ll =================================================================== --- llvm/test/CodeGen/ARM/debug-info-blocks.ll +++ llvm/test/CodeGen/ARM/debug-info-blocks.ll @@ -6,8 +6,7 @@ ; CHECK: DW_TAG_variable ; CHECK-NOT: DW_TAG ; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset] -; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18 -; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18 +; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): {{.*}} DW_OP_plus_uconst 0x4, DW_OP_deref, DW_OP_plus_uconst 0x18) ; CHECK-NEXT: DW_AT_name {{.*}} "mydata" ; Radar 9331779 Index: llvm/test/CodeGen/ARM/fast-isel-call.ll =================================================================== --- llvm/test/CodeGen/ARM/fast-isel-call.ll +++ llvm/test/CodeGen/ARM/fast-isel-call.ll @@ -41,38 +41,31 @@ ; ARM: foo ; THUMB: foo ;; Materialize i1 1 -; ARM: movw r2, #1 +; ARM: movw [[REG0:r[0-9]+]], #1 +; THUMB: movs [[REG0:r[0-9]+]], #1 ;; zero-ext -; ARM: and r2, r2, #1 -; THUMB: and r2, r2, #1 +; ARM: and [[REG1:r[0-9]+]], [[REG0]], #1 +; THUMB: and [[REG1:r[0-9]+]], [[REG0]], #1 %1 = call i32 @t0(i1 zeroext 1) -; ARM: sxtb r2, r1 -; ARM: mov r0, r2 -; THUMB: sxtb r2, r1 -; THUMB: mov r0, r2 +; ARM: sxtb r0, {{r[0-9]+}} +; THUMB: sxtb r0, {{r[0-9]+}} %2 = call i32 @t1(i8 signext %a) -; ARM: and r2, r1, #255 -; ARM: mov r0, r2 -; THUMB: and r2, r1, #255 -; THUMB: mov r0, r2 +; ARM: and r0, {{r[0-9]+}}, #255 +; THUMB: and r0, {{r[0-9]+}}, #255 %3 = call i32 @t2(i8 zeroext %a) -; ARM: sxth r2, r1 -; ARM: mov r0, r2 -; THUMB: sxth r2, r1 -; THUMB: mov r0, r2 +; ARM: sxth r0, {{r[0-9]+}} +; THUMB: sxth r0, {{r[0-9]+}} %4 = call i32 @t3(i16 signext %b) -; ARM: uxth r2, r1 -; ARM: mov r0, r2 -; THUMB: uxth r2, r1 -; THUMB: mov r0, r2 +; ARM: uxth r0, {{r[0-9]+}} +; THUMB: uxth r0, {{r[0-9]+}} %5 = call i32 @t4(i16 zeroext %b) ;; A few test to check materialization ;; Note: i1 1 was materialized with t1 call -; ARM: movw r1, #255 +; ARM: movw {{r[0-9]+}}, #255 %6 = call i32 @t2(i8 zeroext 255) -; ARM: movw r1, #65535 -; THUMB: movw r1, #65535 +; ARM: movw {{r[0-9]+}}, #65535 +; THUMB: movw {{r[0-9]+}}, #65535 %7 = call i32 @t4(i16 zeroext 65535) ret void } @@ -112,10 +105,9 @@ ; ARM: bl {{_?}}bar ; ARM-LONG-LABEL: @t10 -; ARM-LONG-MACHO: {{(movw)|(ldr)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} -; ARM-LONG-MACHO: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}} -; ARM-LONG-MACHO: str [[R]], [r7, [[SLOT:#[-0-9]+]]] @ 4-byte Spill -; ARM-LONG-MACHO: ldr [[R:l?r[0-9]*]], [r7, [[SLOT]]] @ 4-byte Reload +; ARM-LONG-MACHO: {{(movw)|(ldr)}} [[R1:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} +; ARM-LONG-MACHO: {{(movt [[R1]], :upper16:L_bar\$non_lazy_ptr)?}} +; ARM-LONG-MACHO: ldr [[R:r[0-9]+]], {{\[}}[[R1]]] ; ARM-LONG-ELF: movw [[R:l?r[0-9]*]], :lower16:bar ; ARM-LONG-ELF: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}} @@ -138,11 +130,9 @@ ; THUMB-DAG: str.w [[R4]], [sp, #4] ; THUMB: bl {{_?}}bar ; THUMB-LONG-LABEL: @t10 -; THUMB-LONG: {{(movw)|(ldr.n)}} [[R:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} -; THUMB-LONG: {{(movt [[R]], :upper16:L_bar\$non_lazy_ptr)?}} -; THUMB-LONG: ldr{{(.w)?}} [[R]], {{\[}}[[R]]{{\]}} -; THUMB-LONG: str [[R]], [sp, [[SLOT:#[-0-9]+]]] @ 4-byte Spill -; THUMB-LONG: ldr.w [[R:l?r[0-9]*]], [sp, [[SLOT]]] @ 4-byte Reload +; THUMB-LONG: {{(movw)|(ldr.n)}} [[R1:l?r[0-9]*]], {{(:lower16:L_bar\$non_lazy_ptr)|(.LCPI)}} +; THUMB-LONG: {{(movt [[R1]], :upper16:L_bar\$non_lazy_ptr)?}} +; THUMB-LONG: ldr{{(.w)?}} [[R:r[0-9]+]], {{\[}}[[R1]]{{\]}} ; THUMB-LONG: blx [[R]] %call = call i32 @bar(i8 zeroext 0, i8 zeroext -8, i8 zeroext -69, i8 zeroext 28, i8 zeroext 40, i8 zeroext -70) ret i32 0 Index: llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll =================================================================== --- llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll +++ llvm/test/CodeGen/ARM/fast-isel-intrinsic.ll @@ -55,16 +55,13 @@ ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] +; ARM-MACHO: ldr [[REG1:r[0-9]+]], [r0] -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp +; ARM-ELF: movw [[REG1:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG1]], :upper16:temp -; ARM: add r1, r0, #4 -; ARM: add r0, r0, #16 -; ARM: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill -; ARM: mov r0, r1 -; ARM: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; ARM: add r0, [[REG1]], #4 +; ARM: add r1, [[REG1]], #16 ; ARM: movw r2, #17 ; ARM: bl {{_?}}memcpy ; ARM-LONG-LABEL: t2: @@ -80,12 +77,9 @@ ; THUMB-LABEL: t2: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: adds r1, r0, #4 -; THUMB: adds r0, #16 -; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill -; THUMB: mov r0, r1 -; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; THUMB: ldr [[REG1:r[0-9]+]], [r0] +; THUMB: adds r0, [[REG1]], #4 +; THUMB: adds r1, #16 ; THUMB: movs r2, #17 ; THUMB: bl {{_?}}memcpy ; THUMB-LONG-LABEL: t2: @@ -104,15 +98,14 @@ ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] +; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0] -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp +; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG0]], :upper16:temp -; ARM: add r1, r0, #4 -; ARM: add r0, r0, #16 -; ARM: mov r0, r1 +; ARM: add r0, [[REG0]], #4 +; ARM: add r1, [[REG0]], #16 ; ARM: movw r2, #10 ; ARM: bl {{_?}}memmove ; ARM-LONG-LABEL: t3: @@ -128,12 +121,9 @@ ; THUMB-LABEL: t3: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: adds r1, r0, #4 -; THUMB: adds r0, #16 -; THUMB: str r0, [sp[[SLOT:[, #0-9]*]]] @ 4-byte Spill -; THUMB: mov r0, r1 -; THUMB: ldr r1, [sp[[SLOT]]] @ 4-byte Reload +; THUMB: ldr [[REG1:r[0-9]+]], [r0] +; THUMB: adds r0, [[REG1]], #4 +; THUMB: adds r1, #16 ; THUMB: movs r2, #10 ; THUMB: bl {{_?}}memmove ; THUMB-LONG-LABEL: t3: @@ -150,28 +140,28 @@ ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] +; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0] -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp +; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG0]], :upper16:temp -; ARM: ldr r1, [r0, #16] -; ARM: str r1, [r0, #4] -; ARM: ldr r1, [r0, #20] -; ARM: str r1, [r0, #8] -; ARM: ldrh r1, [r0, #24] -; ARM: strh r1, [r0, #12] +; ARM: ldr [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16] +; ARM: str [[REG1]], {{\[}}[[REG0]], #4] +; ARM: ldr [[REG2:r[0-9]+]], {{\[}}[[REG0]], #20] +; ARM: str [[REG2]], {{\[}}[[REG0]], #8] +; ARM: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG0]], #24] +; ARM: strh [[REG3]], {{\[}}[[REG0]], #12] ; ARM: bx lr ; THUMB-LABEL: t4: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: ldr r1, [r0, #16] -; THUMB: str r1, [r0, #4] -; THUMB: ldr r1, [r0, #20] -; THUMB: str r1, [r0, #8] -; THUMB: ldrh r1, [r0, #24] -; THUMB: strh r1, [r0, #12] +; THUMB: ldr [[REG1:r[0-9]+]], [r0] +; THUMB: ldr [[REG2:r[0-9]+]], {{\[}}[[REG1]], #16] +; THUMB: str [[REG2]], {{\[}}[[REG1]], #4] +; THUMB: ldr [[REG3:r[0-9]+]], {{\[}}[[REG1]], #20] +; THUMB: str [[REG3]], {{\[}}[[REG1]], #8] +; THUMB: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG1]], #24] +; THUMB: strh [[REG4]], {{\[}}[[REG1]], #12] ; THUMB: bx lr call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 4 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void @@ -184,36 +174,36 @@ ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] - -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp - -; ARM: ldrh r1, [r0, #16] -; ARM: strh r1, [r0, #4] -; ARM: ldrh r1, [r0, #18] -; ARM: strh r1, [r0, #6] -; ARM: ldrh r1, [r0, #20] -; ARM: strh r1, [r0, #8] -; ARM: ldrh r1, [r0, #22] -; ARM: strh r1, [r0, #10] -; ARM: ldrh r1, [r0, #24] -; ARM: strh r1, [r0, #12] +; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0] + +; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG0]], :upper16:temp + +; ARM: ldrh [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16] +; ARM: strh [[REG1]], {{\[}}[[REG0]], #4] +; ARM: ldrh [[REG2:r[0-9]+]], {{\[}}[[REG0]], #18] +; ARM: strh [[REG2]], {{\[}}[[REG0]], #6] +; ARM: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG0]], #20] +; ARM: strh [[REG3]], {{\[}}[[REG0]], #8] +; ARM: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG0]], #22] +; ARM: strh [[REG4]], {{\[}}[[REG0]], #10] +; ARM: ldrh [[REG5:r[0-9]+]], {{\[}}[[REG0]], #24] +; ARM: strh [[REG5]], {{\[}}[[REG0]], #12] ; ARM: bx lr ; THUMB-LABEL: t5: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: ldrh r1, [r0, #16] -; THUMB: strh r1, [r0, #4] -; THUMB: ldrh r1, [r0, #18] -; THUMB: strh r1, [r0, #6] -; THUMB: ldrh r1, [r0, #20] -; THUMB: strh r1, [r0, #8] -; THUMB: ldrh r1, [r0, #22] -; THUMB: strh r1, [r0, #10] -; THUMB: ldrh r1, [r0, #24] -; THUMB: strh r1, [r0, #12] +; THUMB: ldr [[REG1:r[0-9]+]], [r0] +; THUMB: ldrh [[REG2:r[0-9]+]], {{\[}}[[REG1]], #16] +; THUMB: strh [[REG2]], {{\[}}[[REG1]], #4] +; THUMB: ldrh [[REG3:r[0-9]+]], {{\[}}[[REG1]], #18] +; THUMB: strh [[REG3]], {{\[}}[[REG1]], #6] +; THUMB: ldrh [[REG4:r[0-9]+]], {{\[}}[[REG1]], #20] +; THUMB: strh [[REG4]], {{\[}}[[REG1]], #8] +; THUMB: ldrh [[REG5:r[0-9]+]], {{\[}}[[REG1]], #22] +; THUMB: strh [[REG5]], {{\[}}[[REG1]], #10] +; THUMB: ldrh [[REG6:r[0-9]+]], {{\[}}[[REG1]], #24] +; THUMB: strh [[REG6]], {{\[}}[[REG1]], #12] ; THUMB: bx lr call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 2 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void @@ -224,56 +214,56 @@ ; ARM-MACHO: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr r0, .LCPI)}} ; ARM-MACHO: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; ARM-MACHO: ldr r0, [r0] - -; ARM-ELF: movw r0, :lower16:temp -; ARM-ELF: movt r0, :upper16:temp - -; ARM: ldrb r1, [r0, #16] -; ARM: strb r1, [r0, #4] -; ARM: ldrb r1, [r0, #17] -; ARM: strb r1, [r0, #5] -; ARM: ldrb r1, [r0, #18] -; ARM: strb r1, [r0, #6] -; ARM: ldrb r1, [r0, #19] -; ARM: strb r1, [r0, #7] -; ARM: ldrb r1, [r0, #20] -; ARM: strb r1, [r0, #8] -; ARM: ldrb r1, [r0, #21] -; ARM: strb r1, [r0, #9] -; ARM: ldrb r1, [r0, #22] -; ARM: strb r1, [r0, #10] -; ARM: ldrb r1, [r0, #23] -; ARM: strb r1, [r0, #11] -; ARM: ldrb r1, [r0, #24] -; ARM: strb r1, [r0, #12] -; ARM: ldrb r1, [r0, #25] -; ARM: strb r1, [r0, #13] +; ARM-MACHO: ldr [[REG0:r[0-9]+]], [r0] + +; ARM-ELF: movw [[REG0:r[0-9]+]], :lower16:temp +; ARM-ELF: movt [[REG0]], :upper16:temp + +; ARM: ldrb [[REG1:r[0-9]+]], {{\[}}[[REG0]], #16] +; ARM: strb [[REG1]], {{\[}}[[REG0]], #4] +; ARM: ldrb [[REG2:r[0-9]+]], {{\[}}[[REG0]], #17] +; ARM: strb [[REG2]], {{\[}}[[REG0]], #5] +; ARM: ldrb [[REG3:r[0-9]+]], {{\[}}[[REG0]], #18] +; ARM: strb [[REG3]], {{\[}}[[REG0]], #6] +; ARM: ldrb [[REG4:r[0-9]+]], {{\[}}[[REG0]], #19] +; ARM: strb [[REG4]], {{\[}}[[REG0]], #7] +; ARM: ldrb [[REG5:r[0-9]+]], {{\[}}[[REG0]], #20] +; ARM: strb [[REG5]], {{\[}}[[REG0]], #8] +; ARM: ldrb [[REG6:r[0-9]+]], {{\[}}[[REG0]], #21] +; ARM: strb [[REG6]], {{\[}}[[REG0]], #9] +; ARM: ldrb [[REG7:r[0-9]+]], {{\[}}[[REG0]], #22] +; ARM: strb [[REG7]], {{\[}}[[REG0]], #10] +; ARM: ldrb [[REG8:r[0-9]+]], {{\[}}[[REG0]], #23] +; ARM: strb [[REG8]], {{\[}}[[REG0]], #11] +; ARM: ldrb [[REG9:r[0-9]+]], {{\[}}[[REG0]], #24] +; ARM: strb [[REG9]], {{\[}}[[REG0]], #12] +; ARM: ldrb [[REG10:r[0-9]+]], {{\[}}[[REG0]], #25] +; ARM: strb [[REG10]], {{\[}}[[REG0]], #13] ; ARM: bx lr ; THUMB-LABEL: t6: ; THUMB: {{(movw r0, :lower16:L_temp\$non_lazy_ptr)|(ldr.n r0, .LCPI)}} ; THUMB: {{(movt r0, :upper16:L_temp\$non_lazy_ptr)?}} -; THUMB: ldr r0, [r0] -; THUMB: ldrb r1, [r0, #16] -; THUMB: strb r1, [r0, #4] -; THUMB: ldrb r1, [r0, #17] -; THUMB: strb r1, [r0, #5] -; THUMB: ldrb r1, [r0, #18] -; THUMB: strb r1, [r0, #6] -; THUMB: ldrb r1, [r0, #19] -; THUMB: strb r1, [r0, #7] -; THUMB: ldrb r1, [r0, #20] -; THUMB: strb r1, [r0, #8] -; THUMB: ldrb r1, [r0, #21] -; THUMB: strb r1, [r0, #9] -; THUMB: ldrb r1, [r0, #22] -; THUMB: strb r1, [r0, #10] -; THUMB: ldrb r1, [r0, #23] -; THUMB: strb r1, [r0, #11] -; THUMB: ldrb r1, [r0, #24] -; THUMB: strb r1, [r0, #12] -; THUMB: ldrb r1, [r0, #25] -; THUMB: strb r1, [r0, #13] +; THUMB: ldr [[REG0:r[0-9]+]], [r0] +; THUMB: ldrb [[REG2:r[0-9]+]], {{\[}}[[REG0]], #16] +; THUMB: strb [[REG2]], {{\[}}[[REG0]], #4] +; THUMB: ldrb [[REG3:r[0-9]+]], {{\[}}[[REG0]], #17] +; THUMB: strb [[REG3]], {{\[}}[[REG0]], #5] +; THUMB: ldrb [[REG4:r[0-9]+]], {{\[}}[[REG0]], #18] +; THUMB: strb [[REG4]], {{\[}}[[REG0]], #6] +; THUMB: ldrb [[REG5:r[0-9]+]], {{\[}}[[REG0]], #19] +; THUMB: strb [[REG5]], {{\[}}[[REG0]], #7] +; THUMB: ldrb [[REG6:r[0-9]+]], {{\[}}[[REG0]], #20] +; THUMB: strb [[REG6]], {{\[}}[[REG0]], #8] +; THUMB: ldrb [[REG7:r[0-9]+]], {{\[}}[[REG0]], #21] +; THUMB: strb [[REG7]], {{\[}}[[REG0]], #9] +; THUMB: ldrb [[REG8:r[0-9]+]], {{\[}}[[REG0]], #22] +; THUMB: strb [[REG8]], {{\[}}[[REG0]], #10] +; THUMB: ldrb [[REG9:r[0-9]+]], {{\[}}[[REG0]], #23] +; THUMB: strb [[REG9]], {{\[}}[[REG0]], #11] +; THUMB: ldrb [[REG10:r[0-9]+]], {{\[}}[[REG0]], #24] +; THUMB: strb [[REG10]], {{\[}}[[REG0]], #12] +; THUMB: ldrb [[REG11:r[0-9]+]], {{\[}}[[REG0]], #25] +; THUMB: strb [[REG11]], {{\[}}[[REG0]], #13] ; THUMB: bx lr call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 4), i8* align 1 getelementptr inbounds ([60 x i8], [60 x i8]* @temp, i32 0, i32 16), i32 10, i1 false) ret void Index: llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll =================================================================== --- llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll +++ llvm/test/CodeGen/ARM/fast-isel-ldr-str-thumb-neg-index.ll @@ -2,7 +2,7 @@ define i32 @t1(i32* nocapture %ptr) nounwind readonly { entry: -; THUMB: t1 +; THUMB-LABEL: t1: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1 %0 = load i32, i32* %add.ptr, align 4 ; THUMB: ldr r{{[0-9]}}, [r0, #-4] @@ -11,7 +11,7 @@ define i32 @t2(i32* nocapture %ptr) nounwind readonly { entry: -; THUMB: t2 +; THUMB-LABEL: t2: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63 %0 = load i32, i32* %add.ptr, align 4 ; THUMB: ldr r{{[0-9]}}, [r0, #-252] @@ -20,7 +20,7 @@ define i32 @t3(i32* nocapture %ptr) nounwind readonly { entry: -; THUMB: t3 +; THUMB-LABEL: t3: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -64 %0 = load i32, i32* %add.ptr, align 4 ; THUMB: ldr r{{[0-9]}}, [r0] @@ -29,7 +29,7 @@ define zeroext i16 @t4(i16* nocapture %ptr) nounwind readonly { entry: -; THUMB: t4 +; THUMB-LABEL: t4: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1 %0 = load i16, i16* %add.ptr, align 2 ; THUMB: ldrh r{{[0-9]}}, [r0, #-2] @@ -38,7 +38,7 @@ define zeroext i16 @t5(i16* nocapture %ptr) nounwind readonly { entry: -; THUMB: t5 +; THUMB-LABEL: t5: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127 %0 = load i16, i16* %add.ptr, align 2 ; THUMB: ldrh r{{[0-9]}}, [r0, #-254] @@ -47,7 +47,7 @@ define zeroext i16 @t6(i16* nocapture %ptr) nounwind readonly { entry: -; THUMB: t6 +; THUMB-LABEL: t6: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -128 %0 = load i16, i16* %add.ptr, align 2 ; THUMB: ldrh r{{[0-9]}}, [r0] @@ -56,7 +56,7 @@ define zeroext i8 @t7(i8* nocapture %ptr) nounwind readonly { entry: -; THUMB: t7 +; THUMB-LABEL: t7: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1 %0 = load i8, i8* %add.ptr, align 1 ; THUMB: ldrb r{{[0-9]}}, [r0, #-1] @@ -65,7 +65,7 @@ define zeroext i8 @t8(i8* nocapture %ptr) nounwind readonly { entry: -; THUMB: t8 +; THUMB-LABEL: t8: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255 %0 = load i8, i8* %add.ptr, align 1 ; THUMB: ldrb r{{[0-9]}}, [r0, #-255] @@ -74,7 +74,7 @@ define zeroext i8 @t9(i8* nocapture %ptr) nounwind readonly { entry: -; THUMB: t9 +; THUMB-LABEL: t9: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -256 %0 = load i8, i8* %add.ptr, align 1 ; THUMB: ldrb r{{[0-9]}}, [r0] @@ -83,81 +83,96 @@ define void @t10(i32* nocapture %ptr) nounwind { entry: -; THUMB: t10 +; THUMB-LABEL: t10: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -1 store i32 0, i32* %add.ptr, align 4 -; THUMB: str r{{[0-9]}}, [r0, #-4] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: str r{{[0-9]}}, {{\[}}[[REG]], #-4] ret void } define void @t11(i32* nocapture %ptr) nounwind { entry: -; THUMB: t11 +; THUMB-LABEL: t11: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -63 store i32 0, i32* %add.ptr, align 4 -; THUMB: str r{{[0-9]}}, [r0, #-252] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: str r{{[0-9]}}, {{\[}}[[REG]], #-252] ret void } define void @t12(i32* nocapture %ptr) nounwind { entry: -; THUMB: t12 +; THUMB-LABEL: t12: %add.ptr = getelementptr inbounds i32, i32* %ptr, i32 -64 store i32 0, i32* %add.ptr, align 4 -; THUMB: str r{{[0-9]}}, [r0] +; THUMB: movw [[REG:r[0-9]+]], #65280 +; THUMB: movt [[REG]], #65535 +; THUMB: add [[REG]], r0 +; THUMB: str r{{[0-9]}}, {{\[}}[[REG]]] ret void } define void @t13(i16* nocapture %ptr) nounwind { entry: -; THUMB: t13 +; THUMB-LABEL: t13: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -1 store i16 0, i16* %add.ptr, align 2 -; THUMB: strh r{{[0-9]}}, [r0, #-2] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]], #-2] ret void } define void @t14(i16* nocapture %ptr) nounwind { entry: -; THUMB: t14 +; THUMB-LABEL: t14: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -127 store i16 0, i16* %add.ptr, align 2 -; THUMB: strh r{{[0-9]}}, [r0, #-254] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]], #-254] ret void } define void @t15(i16* nocapture %ptr) nounwind { entry: -; THUMB: t15 +; THUMB-LABEL: t15: %add.ptr = getelementptr inbounds i16, i16* %ptr, i32 -128 store i16 0, i16* %add.ptr, align 2 -; THUMB: strh r{{[0-9]}}, [r0] +; THUMB: movw [[REG:r[0-9]+]], #65280 +; THUMB: movt [[REG]], #65535 +; THUMB: add [[REG]], r0 +; THUMB: strh r{{[0-9]}}, {{\[}}[[REG]]] ret void } define void @t16(i8* nocapture %ptr) nounwind { entry: -; THUMB: t16 +; THUMB-LABEL: t16: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -1 store i8 0, i8* %add.ptr, align 1 -; THUMB: strb r{{[0-9]}}, [r0, #-1] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]], #-1] ret void } define void @t17(i8* nocapture %ptr) nounwind { entry: -; THUMB: t17 +; THUMB-LABEL: t17: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -255 store i8 0, i8* %add.ptr, align 1 -; THUMB: strb r{{[0-9]}}, [r0, #-255] +; THUMB: mov [[REG:r[0-9]+]], r0 +; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]], #-255] ret void } define void @t18(i8* nocapture %ptr) nounwind { entry: -; THUMB: t18 +; THUMB-LABEL: t18: %add.ptr = getelementptr inbounds i8, i8* %ptr, i32 -256 store i8 0, i8* %add.ptr, align 1 -; THUMB: strb r{{[0-9]}}, [r0] +; THUMB: movw [[REG:r[0-9]+]], #65280 +; THUMB: movt [[REG]], #65535 +; THUMB: add [[REG]], r0 +; THUMB: strb r{{[0-9]}}, {{\[}}[[REG]]] ret void } Index: llvm/test/CodeGen/ARM/fast-isel-select.ll =================================================================== --- llvm/test/CodeGen/ARM/fast-isel-select.ll +++ llvm/test/CodeGen/ARM/fast-isel-select.ll @@ -21,14 +21,12 @@ define i32 @t2(i1 %c, i32 %a) nounwind readnone { entry: ; ARM: t2 -; ARM: tst r0, #1 -; ARM: moveq r{{[1-9]}}, #20 -; ARM: mov r0, r{{[1-9]}} +; ARM: tst {{r[0-9]+}}, #1 +; ARM: moveq {{r[0-9]+}}, #20 ; THUMB-LABEL: t2 -; THUMB: tst.w r0, #1 +; THUMB: tst.w {{r[0-9]+}}, #1 ; THUMB: it eq -; THUMB: moveq r{{[1-9]}}, #20 -; THUMB: mov r0, r{{[1-9]}} +; THUMB: moveq {{r[0-9]+}}, #20 %0 = select i1 %c, i32 %a, i32 20 ret i32 %0 } @@ -43,7 +41,7 @@ ; THUMB: tst.w r0, #1 ; THUMB: it ne ; THUMB: movne r2, r1 -; THUMB: add.w r0, r2, r1 +; THUMB: adds r0, r2, r1 %0 = select i1 %c, i32 %a, i32 %b %1 = add i32 %0, %a ret i32 %1 @@ -67,14 +65,12 @@ define i32 @t5(i1 %c, i32 %a) nounwind readnone { entry: ; ARM: t5 -; ARM: tst r0, #1 -; ARM: mvneq r{{[1-9]}}, #1 -; ARM: mov r0, r{{[1-9]}} +; ARM: tst {{r[0-9]+}}, #1 +; ARM: mvneq {{r[0-9]+}}, #1 ; THUMB: t5 -; THUMB: tst.w r0, #1 +; THUMB: tst.w {{r[0-9]+}}, #1 ; THUMB: it eq -; THUMB: mvneq r{{[1-9]}}, #1 -; THUMB: mov r0, r{{[1-9]}} +; THUMB: mvneq {{r[0-9]+}}, #1 %0 = select i1 %c, i32 %a, i32 -2 ret i32 %0 } @@ -83,14 +79,12 @@ define i32 @t6(i1 %c, i32 %a) nounwind readnone { entry: ; ARM: t6 -; ARM: tst r0, #1 -; ARM: mvneq r{{[1-9]}}, #978944 -; ARM: mov r0, r{{[1-9]}} +; ARM: tst {{r[0-9]+}}, #1 +; ARM: mvneq {{r[0-9]+}}, #978944 ; THUMB: t6 -; THUMB: tst.w r0, #1 +; THUMB: tst.w {{r[0-9]+}}, #1 ; THUMB: it eq -; THUMB: mvneq r{{[1-9]}}, #978944 -; THUMB: mov r0, r{{[1-9]}} +; THUMB: mvneq {{r[0-9]+}}, #978944 %0 = select i1 %c, i32 %a, i32 -978945 ret i32 %0 } Index: llvm/test/CodeGen/ARM/fast-isel-vararg.ll =================================================================== --- llvm/test/CodeGen/ARM/fast-isel-vararg.ll +++ llvm/test/CodeGen/ARM/fast-isel-vararg.ll @@ -17,26 +17,24 @@ %4 = load i32, i32* %n, align 4 ; ARM: VarArg ; ARM: mov [[FP:r[0-9]+]], sp -; ARM: sub sp, sp, #{{(36|40)}} +; ARM: sub sp, sp, #32 ; ARM: ldr r1, {{\[}}[[FP]], #-4] ; ARM: ldr r2, {{\[}}[[FP]], #-8] ; ARM: ldr r3, {{\[}}[[FP]], #-12] -; ARM: ldr [[Ra:r[0-9]+]], {{\[}}[[FP]], #-16] -; ARM: ldr [[Rb:[lr]+[0-9]*]], [sp, #{{(16|20)}}] -; ARM: movw [[Rc:[lr]+[0-9]*]], #5 -; Ra got spilled -; ARM: mov r0, [[Rc]] -; ARM: str {{.*}}, [sp] +; ARM: ldr [[Ra:r[0-9]+|lr]], [sp, #16] +; ARM: ldr [[Rb:[lr]+[0-9]*]], [sp, #12] +; ARM: movw r0, #5 +; ARM: str [[Ra]], [sp] ; ARM: str [[Rb]], [sp, #4] ; ARM: bl {{_?CallVariadic}} -; THUMB: sub sp, #{{36}} -; THUMB: ldr r1, [sp, #32] -; THUMB: ldr r2, [sp, #28] -; THUMB: ldr r3, [sp, #24] -; THUMB: ldr {{[a-z0-9]+}}, [sp, #20] -; THUMB: ldr.w {{[a-z0-9]+}}, [sp, #16] -; THUMB: str.w {{[a-z0-9]+}}, [sp] -; THUMB: str.w {{[a-z0-9]+}}, [sp, #4] +; THUMB: sub sp, #32 +; THUMB: ldr r1, [sp, #28] +; THUMB: ldr r2, [sp, #24] +; THUMB: ldr r3, [sp, #20] +; THUMB: ldr.w [[Ra:r[0-9]+]], [sp, #16] +; THUMB: ldr.w [[Rb:r[0-9]+]], [sp, #12] +; THUMB: str.w [[Ra]], [sp] +; THUMB: str.w [[Rb]], [sp, #4] ; THUMB: bl {{_?}}CallVariadic %call = call i32 (i32, ...) @CallVariadic(i32 5, i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) store i32 %call, i32* %tmp, align 4 Index: llvm/test/CodeGen/ARM/ldrd.ll =================================================================== --- llvm/test/CodeGen/ARM/ldrd.ll +++ llvm/test/CodeGen/ARM/ldrd.ll @@ -81,11 +81,12 @@ ; CHECK-LABEL: Func1: define void @Func1() nounwind ssp "frame-pointer"="all" { entry: -; A8: movw [[BASE:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}} -; A8: movt [[BASE]], :upper16:{{.*}}TestVar{{.*}} +; A8: movw [[BASER:r[0-9]+]], :lower16:{{.*}}TestVar{{.*}} +; A8: movt [[BASER]], :upper16:{{.*}}TestVar{{.*}} +; A8: ldr [[BASE:r[0-9]+]], {{\[}}[[BASER]]] ; A8: ldrd [[FIELD1:r[0-9]+]], [[FIELD2:r[0-9]+]], {{\[}}[[BASE]], #4] -; A8-NEXT: add [[FIELD1]], [[FIELD2]] -; A8-NEXT: str [[FIELD1]], {{\[}}[[BASE]]{{\]}} +; A8-NEXT: add [[FIELD2]], [[FIELD1]] +; A8-NEXT: str [[FIELD2]], {{\[}}[[BASE]]{{\]}} ; CONSERVATIVE-NOT: ldrd %orig_blocks = alloca [256 x i16], align 2 %0 = bitcast [256 x i16]* %orig_blocks to i8*call void @llvm.lifetime.start.p0i8(i64 512, i8* %0) nounwind Index: llvm/test/CodeGen/ARM/legalize-bitcast.ll =================================================================== --- llvm/test/CodeGen/ARM/legalize-bitcast.ll +++ llvm/test/CodeGen/ARM/legalize-bitcast.ll @@ -14,17 +14,17 @@ ; CHECK-NEXT: ldr r0, [r0] ; CHECK-NEXT: @ implicit-def: $d17 ; CHECK-NEXT: vmov.32 d17[0], r0 -; CHECK-NEXT: vrev32.16 d17, d17 +; CHECK-NEXT: vrev32.16 d18, d17 ; CHECK-NEXT: vrev16.8 d16, d16 -; CHECK-NEXT: vmov.f64 d18, d16 -; CHECK-NEXT: vmov.f64 d19, d17 -; CHECK-NEXT: vstmia sp, {d18, d19} @ 16-byte Spill +; CHECK-NEXT: @ kill: def $d16 killed $d16 def $q8 +; CHECK-NEXT: vmov.f64 d17, d18 +; CHECK-NEXT: vstmia sp, {d16, d17} @ 16-byte Spill ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_1: @ %bb.1 ; CHECK-NEXT: vldmia sp, {d16, d17} @ 16-byte Reload -; CHECK-NEXT: vrev32.16 q9, q8 -; CHECK-NEXT: @ kill: def $d19 killed $d19 killed $q9 -; CHECK-NEXT: vmov.32 r0, d19[0] +; CHECK-NEXT: vrev32.16 q8, q8 +; CHECK-NEXT: vmov.f64 d16, d17 +; CHECK-NEXT: vmov.32 r0, d16[0] ; CHECK-NEXT: add sp, sp, #28 ; CHECK-NEXT: pop {r4} ; CHECK-NEXT: bx lr @@ -41,15 +41,15 @@ define i16 @int_to_vec(i80 %in) { ; CHECK-LABEL: int_to_vec: ; CHECK: @ %bb.0: -; CHECK-NEXT: mov r3, r1 -; CHECK-NEXT: mov r12, r0 +; CHECK-NEXT: @ kill: def $r2 killed $r1 +; CHECK-NEXT: @ kill: def $r2 killed $r0 ; CHECK-NEXT: lsl r0, r0, #16 ; CHECK-NEXT: orr r0, r0, r1, lsr #16 -; CHECK-NEXT: @ implicit-def: $d16 -; CHECK-NEXT: vmov.32 d16[0], r0 -; CHECK-NEXT: @ implicit-def: $q9 -; CHECK-NEXT: vmov.f64 d18, d16 -; CHECK-NEXT: vrev32.16 q8, q9 +; CHECK-NEXT: @ implicit-def: $d18 +; CHECK-NEXT: vmov.32 d18[0], r0 +; CHECK-NEXT: @ implicit-def: $q8 +; CHECK-NEXT: vmov.f64 d16, d18 +; CHECK-NEXT: vrev32.16 q8, q8 ; CHECK-NEXT: @ kill: def $d16 killed $d16 killed $q8 ; CHECK-NEXT: vmov.u16 r0, d16[0] ; CHECK-NEXT: bx lr Index: llvm/test/CodeGen/ARM/stack-guard-reassign.ll =================================================================== --- llvm/test/CodeGen/ARM/stack-guard-reassign.ll +++ llvm/test/CodeGen/ARM/stack-guard-reassign.ll @@ -3,11 +3,12 @@ ; Verify that the offset assigned to the stack protector is at the top of the ; frame, covering the locals. ; CHECK-LABEL: fn: -; CHECK: sub sp, sp, #32 +; CHECK: sub sp, sp, #24 ; CHECK-NEXT: sub sp, sp, #65536 ; CHECK-NEXT: ldr r1, .LCPI0_0 -; CHECK-NEXT: ldr r2, [r1] +; CHECK-NEXT: str r1, [sp, #8] +; CHECK-NEXT: ldr r1, [r1] ; CHECK-NEXT: add lr, sp, #65536 -; CHECK-NEXT: str r2, [lr, #28] +; CHECK-NEXT: str r1, [lr, #20] ; CHECK: .LCPI0_0: ; CHECK-NEXT: .long __stack_chk_guard Index: llvm/test/CodeGen/ARM/swifterror.ll =================================================================== --- llvm/test/CodeGen/ARM/swifterror.ll +++ llvm/test/CodeGen/ARM/swifterror.ll @@ -21,9 +21,9 @@ ; CHECK-O0: mov r{{.*}}, #16 ; CHECK-O0: malloc ; CHECK-O0: mov [[ID2:r[0-9]+]], r0 -; CHECK-O0: mov [[ID:r[0-9]+]], #1 -; CHECK-O0: strb [[ID]], [r0, #8] ; CHECK-O0: mov r8, [[ID2]] +; CHECK-O0: mov [[ID:r[0-9]+]], #1 +; CHECK-O0: strb [[ID]], {{\[}}[[ID2]], #8] entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -49,16 +49,16 @@ ; CHECK-O0-LABEL: caller: ; spill r0 ; CHECK-O0-DAG: mov r8, #0 -; CHECK-O0-DAG: str r0, [sp, [[SLOT:#[0-9]+]] +; CHECK-O0-DAG: str r0, [sp[[SLOT:(, #[0-9]+)?]]] ; CHECK-O0: bl {{.*}}foo ; CHECK-O0: mov [[TMP:r[0-9]+]], r8 -; CHECK-O0: str [[TMP]], [sp] +; CHECK-O0: str [[TMP]], [sp[[SLOT2:(, #[0-9]+)?]]] ; CHECK-O0: bne +; CHECK-O0: ldr [[ID:r[0-9]+]], [sp[[SLOT]]] ; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8] -; CHECK-O0: ldr [[ID:r[0-9]+]], [sp, [[SLOT]]] ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]] ; reload r0 -; CHECK-O0: ldr r0, [sp] +; CHECK-O0: ldr r0, [sp[[SLOT2]]] ; CHECK-O0: free entry: %error_ptr_ref = alloca swifterror %swift_error* @@ -98,14 +98,14 @@ ; CHECK-O0-DAG: mov r8, #0 ; CHECK-O0: bl {{.*}}foo ; CHECK-O0: mov r{{.*}}, r8 -; CHECK-O0: str r0, [sp] +; CHECK-O0: str r0, [sp{{(, #[0-9]+)?}}] ; CHECK-O0: bne ; CHECK-O0: ble -; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8] ; reload r0 ; CHECK-O0: ldr [[ID:r[0-9]+]], +; CHECK-O0: ldrb [[CODE:r[0-9]+]], [r0, #8] ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]] -; CHECK-O0: ldr r0, [sp] +; CHECK-O0: ldr r0, [sp{{(, #[0-9]+)?}}] ; CHECK-O0: free entry: %error_ptr_ref = alloca swifterror %swift_error* @@ -143,16 +143,15 @@ ; CHECK-APPLE-DAG: strb [[ID]], [r{{.*}}, #8] ; CHECK-O0-LABEL: foo_if: -; CHECK-O0: cmp r0, #0 ; spill to stack ; CHECK-O0: str r8 +; CHECK-O0: cmp r0, #0 ; CHECK-O0: beq ; CHECK-O0: mov r0, #16 ; CHECK-O0: malloc ; CHECK-O0: mov [[ID:r[0-9]+]], r0 ; CHECK-O0: mov [[ID2:[a-z0-9]+]], #1 -; CHECK-O0: strb [[ID2]], [r0, #8] -; CHECK-O0: mov r8, [[ID]] +; CHECK-O0: strb [[ID2]], {{\[}}[[ID]], #8] ; reload from stack ; CHECK-O0: ldr r8 entry: @@ -233,18 +232,18 @@ ; CHECK-APPLE-DAG: str r{{.*}}, [{{.*}}[[SRET]], #4] ; CHECK-O0-LABEL: foo_sret: -; CHECK-O0: mov r{{.*}}, #16 +; CHECK-O0-DAG: mov r{{.*}}, #16 ; spill to stack: sret and val1 ; CHECK-O0-DAG: str r0 ; CHECK-O0-DAG: str r1 ; CHECK-O0: malloc -; CHECK-O0: mov [[ID:r[0-9]+]], #1 -; CHECK-O0: strb [[ID]], [r0, #8] ; reload from stack: sret and val1 ; CHECK-O0: ldr ; CHECK-O0: ldr -; CHECK-O0: str r{{.*}}, [{{.*}}, #4] -; CHECK-O0: mov r8 +; CHECK-O0-DAG: mov r8 +; CHECK-O0-DAG: mov [[ID:r[0-9]+]], #1 +; CHECK-O0-DAG: strb [[ID]], [{{r[0-9]+}}, #8] +; CHECK-O0-DAG: str r{{.*}}, [{{.*}}, #4] entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -271,16 +270,15 @@ ; CHECK-O0-LABEL: caller3: ; CHECK-O0-DAG: mov r8, #0 -; CHECK-O0-DAG: mov r0 ; CHECK-O0-DAG: mov r1 ; CHECK-O0: bl {{.*}}foo_sret ; CHECK-O0: mov [[ID2:r[0-9]+]], r8 -; CHECK-O0: cmp r8 ; CHECK-O0: str [[ID2]], [sp[[SLOT:.*]]] +; CHECK-O0: cmp r8 ; CHECK-O0: bne ; Access part of the error object and save it to error_ref -; CHECK-O0: ldrb [[CODE:r[0-9]+]] ; CHECK-O0: ldr [[ID:r[0-9]+]] +; CHECK-O0: ldrb [[CODE:r[0-9]+]] ; CHECK-O0: strb [[CODE]], [{{.*}}[[ID]]] ; CHECK-O0: ldr r0, [sp[[SLOT]] ; CHECK-O0: bl {{.*}}free Index: llvm/test/CodeGen/ARM/thumb-big-stack.ll =================================================================== --- llvm/test/CodeGen/ARM/thumb-big-stack.ll +++ llvm/test/CodeGen/ARM/thumb-big-stack.ll @@ -12,7 +12,7 @@ ; CHECK: movw [[ADDR:(r[0-9]+|lr)]], # ; CHECK-NEXT: add [[ADDR]], sp ; CHECK-NEXT: vst1.64 {d{{[0-9]+}}, d{{[0-9]+}}}, {{\[}}[[ADDR]]:128] -define <4 x float> @f(<4 x float> %x, float %val) { +define <4 x float> @f(<4 x float> %x) { entry: %.compoundliteral7837 = alloca <4 x float>, align 16 %.compoundliteral7865 = alloca <4 x float>, align 16 @@ -143,9 +143,9 @@ %.compoundliteral13969 = alloca <4 x float>, align 16 %.compoundliteral13983 = alloca <4 x float>, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -153,17 +153,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add68 = fadd <4 x float> %tmp1, %tmp tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add68, <4 x float>* undef, align 16 + store <4 x float> %add68, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp2 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add76 = fadd float %val, 0x4074C999A0000000 + %add76 = fadd float undef, 0x4074C999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp3 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins77 = insertelement <4 x float> %tmp3, float %add76, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins77, <4 x float>* undef, align 16 + store <4 x float> %vecins77, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp4 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -175,15 +175,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins80 = insertelement <4 x float> %tmp5, float %add79, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins80, <4 x float>* undef, align 16 + store <4 x float> %vecins80, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp6 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add82 = fadd <4 x float> undef, %tmp6 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add82, <4 x float>* undef, align 16 + store <4 x float> %add82, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp7 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -195,19 +195,19 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins85 = insertelement <4 x float> %tmp8, float %add84, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins85, <4 x float>* undef, align 16 + store <4 x float> %vecins85, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp9 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext86 = extractelement <4 x float> %tmp9, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add93 = fadd float %val, 0xC076C66660000000 + %add93 = fadd float undef, 0xC076C66660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp10 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins94 = insertelement <4 x float> %tmp10, float %add93, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp11 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -223,17 +223,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp14 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins102 = insertelement <4 x float> undef, float %val, i32 1 + %vecins102 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins102, <4 x float>* undef, align 16 + store <4 x float> %vecins102, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp15 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add104 = fadd float %val, 0x406AB999A0000000 + %add104 = fadd float undef, 0x406AB999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp16 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext579 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -243,7 +243,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins581 = insertelement <4 x float> %tmp17, float %add580, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins581, <4 x float>* undef, align 16 + store <4 x float> %vecins581, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp18 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -251,7 +251,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add583 = fadd float %vecext582, 0x40444CCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp19 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -261,25 +261,25 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins592 = insertelement <4 x float> undef, float %add591, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins592, <4 x float>* undef, align 16 + store <4 x float> %vecins592, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp20 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add594 = fadd float %val, 0xC05B466660000000 + %add594 = fadd float undef, 0xC05B466660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add605 = fadd float %val, 0x407164CCC0000000 + %add605 = fadd float undef, 0x407164CCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp21 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add616 = fadd float %val, 1.885000e+02 + %add616 = fadd float undef, 1.885000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp22 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp23 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins620 = insertelement <4 x float> undef, float %val, i32 1 + %vecins620 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins620, <4 x float>* undef, align 16 + store <4 x float> %vecins620, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext621 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -287,7 +287,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins623 = insertelement <4 x float> undef, float %add622, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins623, <4 x float>* undef, align 16 + store <4 x float> %vecins623, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp24 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -299,9 +299,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins626 = insertelement <4 x float> %tmp25, float %add625, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins626, <4 x float>* undef, align 16 + store <4 x float> %vecins626, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp26 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -309,7 +309,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add628 = fadd <4 x float> %tmp27, %tmp26 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add628, <4 x float>* undef, align 16 + store <4 x float> %add628, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp28 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -321,7 +321,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins631 = insertelement <4 x float> %tmp29, float %add630, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins631, <4 x float>* undef, align 16 + store <4 x float> %vecins631, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp30 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -333,7 +333,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins634 = insertelement <4 x float> %tmp31, float %add633, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins634, <4 x float>* undef, align 16 + store <4 x float> %vecins634, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp32 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -347,13 +347,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp35 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add658 = fadd float %val, 0xC04A4CCCC0000000 + %add658 = fadd float undef, 0xC04A4CCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext663 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp36 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins665 = insertelement <4 x float> %tmp36, float %val, i32 2 + %vecins665 = insertelement <4 x float> %tmp36, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext694 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -363,31 +363,31 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins696 = insertelement <4 x float> %tmp37, float %add695, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins696, <4 x float>* undef, align 16 + store <4 x float> %vecins696, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp38 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext699 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add703 = fadd float %val, 0x4068F33340000000 + %add703 = fadd float undef, 0x4068F33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins704 = insertelement <4 x float> undef, float %add703, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins704, <4 x float>* undef, align 16 + store <4 x float> %vecins704, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp39 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp40 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins710 = insertelement <4 x float> %tmp40, float %val, i32 3 + %vecins710 = insertelement <4 x float> %tmp40, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins710, <4 x float>* undef, align 16 + store <4 x float> %vecins710, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp41 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -395,7 +395,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add712 = fadd <4 x float> %tmp42, %tmp41 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add712, <4 x float>* undef, align 16 + store <4 x float> %add712, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp43 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -403,7 +403,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp44 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins715 = insertelement <4 x float> %tmp44, float %val, i32 0 + %vecins715 = insertelement <4 x float> %tmp44, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp45 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -415,19 +415,19 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins718 = insertelement <4 x float> %tmp46, float %add717, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins718, <4 x float>* undef, align 16 + store <4 x float> %vecins718, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp47 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext719 = extractelement <4 x float> %tmp47, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add723 = fadd float %val, 0xC06A6CCCC0000000 + %add723 = fadd float undef, 0xC06A6CCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins724 = insertelement <4 x float> undef, float %add723, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add726 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext730 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -437,19 +437,19 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins732 = insertelement <4 x float> %tmp48, float %add731, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins732, <4 x float>* undef, align 16 + store <4 x float> %vecins732, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp49 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext733 = extractelement <4 x float> %tmp49, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp50 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins738 = insertelement <4 x float> %tmp50, float %val, i32 3 + %vecins738 = insertelement <4 x float> %tmp50, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp51 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -465,7 +465,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins743 = insertelement <4 x float> %tmp53, float %add742, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins743, <4 x float>* undef, align 16 + store <4 x float> %vecins743, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp54 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -473,7 +473,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add754 = fadd <4 x float> %tmp55, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add754, <4 x float>* undef, align 16 + store <4 x float> %add754, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp56 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -485,7 +485,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins757 = insertelement <4 x float> %tmp57, float %add756, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add765 = fadd float %val, 0x405BA66660000000 + %add765 = fadd float undef, 0x405BA66660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp58 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -501,11 +501,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins771 = insertelement <4 x float> %tmp60, float %add770, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins771, <4 x float>* undef, align 16 + store <4 x float> %vecins771, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp61 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add776 = fadd float %val, 0xC055F33340000000 + %add776 = fadd float undef, 0xC055F33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins777 = insertelement <4 x float> undef, float %add776, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -515,7 +515,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add782 = fadd <4 x float> %tmp63, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add782, <4 x float>* undef, align 16 + store <4 x float> %add782, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp64 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -523,25 +523,25 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add784 = fadd float %vecext783, -3.455000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add796 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add796, <4 x float>* undef, align 16 + store <4 x float> %add796, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp65 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add801 = fadd float %val, 3.045000e+02 + %add801 = fadd float undef, 3.045000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp66 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins802 = insertelement <4 x float> %tmp66, float %add801, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins802, <4 x float>* undef, align 16 + store <4 x float> %vecins802, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext803 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp67 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -549,7 +549,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add810 = fadd <4 x float> undef, %tmp68 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add810, <4 x float>* undef, align 16 + store <4 x float> %add810, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp69 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -557,17 +557,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp70 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins813 = insertelement <4 x float> %tmp70, float %val, i32 0 + %vecins813 = insertelement <4 x float> %tmp70, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext817 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add818 = fadd float %vecext817, -4.830000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins822 = insertelement <4 x float> undef, float %val, i32 3 + %vecins822 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins822, <4 x float>* undef, align 16 + store <4 x float> %vecins822, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp71 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -577,17 +577,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add838 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add838, <4 x float>* undef, align 16 + store <4 x float> %add838, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp73 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext839 = extractelement <4 x float> %tmp73, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add849 = fadd float %val, 0xC07C266660000000 + %add849 = fadd float undef, 0xC07C266660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp74 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -609,9 +609,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins861 = insertelement <4 x float> %tmp77, float %add860, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins889 = insertelement <4 x float> undef, float %val, i32 2 + %vecins889 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins889, <4 x float>* undef, align 16 + store <4 x float> %vecins889, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp78 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -623,9 +623,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins892 = insertelement <4 x float> %tmp79, float %add891, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins892, <4 x float>* undef, align 16 + store <4 x float> %vecins892, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp80 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -633,7 +633,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add894 = fadd <4 x float> %tmp81, %tmp80 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add894, <4 x float>* undef, align 16 + store <4 x float> %add894, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext895 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -659,7 +659,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins903 = insertelement <4 x float> %tmp84, float %add902, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins903, <4 x float>* undef, align 16 + store <4 x float> %vecins903, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext904 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -669,7 +669,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins906 = insertelement <4 x float> %tmp85, float %add905, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp86 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -677,13 +677,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add908 = fadd <4 x float> %tmp87, %tmp86 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add908, <4 x float>* undef, align 16 + store <4 x float> %add908, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp88 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp89 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp90 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -703,7 +703,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins917 = insertelement <4 x float> %tmp92, float %add916, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins917, <4 x float>* undef, align 16 + store <4 x float> %vecins917, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp93 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -715,17 +715,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins920 = insertelement <4 x float> %tmp94, float %add919, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins920, <4 x float>* undef, align 16 + store <4 x float> %vecins920, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp95 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins925 = insertelement <4 x float> %tmp95, float %val, i32 0 + %vecins925 = insertelement <4 x float> %tmp95, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins925, <4 x float>* undef, align 16 + store <4 x float> %vecins925, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp96 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add927 = fadd float %val, 0xC0501999A0000000 + %add927 = fadd float undef, 0xC0501999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp97 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -739,7 +739,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins931 = insertelement <4 x float> %tmp98, float %add930, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp99 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -747,11 +747,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext937 = extractelement <4 x float> %tmp100, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add941 = fadd float %val, -4.665000e+02 + %add941 = fadd float undef, -4.665000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins942 = insertelement <4 x float> undef, float %add941, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins942, <4 x float>* undef, align 16 + store <4 x float> %vecins942, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp101 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -763,29 +763,29 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins945 = insertelement <4 x float> %tmp102, float %add944, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins945, <4 x float>* undef, align 16 + store <4 x float> %vecins945, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp103 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add947 = fadd float %val, 0xC051933340000000 + %add947 = fadd float undef, 0xC051933340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp104 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins948 = insertelement <4 x float> %tmp104, float %add947, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins948, <4 x float>* undef, align 16 + store <4 x float> %vecins948, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp105 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add955 = fadd float %val, 0x4077F4CCC0000000 + %add955 = fadd float undef, 0x4077F4CCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp106 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins956 = insertelement <4 x float> %tmp106, float %add955, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins956, <4 x float>* undef, align 16 + store <4 x float> %vecins956, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext971 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -795,17 +795,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins973 = insertelement <4 x float> %tmp107, float %add972, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins973, <4 x float>* undef, align 16 + store <4 x float> %vecins973, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp108 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext974 = extractelement <4 x float> %tmp108, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins976 = insertelement <4 x float> undef, float %val, i32 3 + %vecins976 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins976, <4 x float>* undef, align 16 + store <4 x float> %vecins976, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp109 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -817,7 +817,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp112 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext982 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -825,7 +825,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins984 = insertelement <4 x float> undef, float %add983, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins984, <4 x float>* undef, align 16 + store <4 x float> %vecins984, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp113 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -837,25 +837,25 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins987 = insertelement <4 x float> %tmp114, float %add986, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins987, <4 x float>* undef, align 16 + store <4 x float> %vecins987, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp115 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp116 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins995 = insertelement <4 x float> %tmp116, float %val, i32 0 + %vecins995 = insertelement <4 x float> %tmp116, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins995, <4 x float>* undef, align 16 + store <4 x float> %vecins995, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp117 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add997 = fadd float %val, 0xC0798999A0000000 + %add997 = fadd float undef, 0xC0798999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp118 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins998 = insertelement <4 x float> %tmp118, float %add997, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins998, <4 x float>* undef, align 16 + store <4 x float> %vecins998, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp119 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -865,7 +865,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp120 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp121 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -879,13 +879,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1031 = fadd float %vecext1030, 2.010000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp123 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp124 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1085 = insertelement <4 x float> %tmp124, float %val, i32 2 + %vecins1085 = insertelement <4 x float> %tmp124, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp125 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -897,13 +897,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1088 = insertelement <4 x float> %tmp126, float %add1087, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1088, <4 x float>* undef, align 16 + store <4 x float> %vecins1088, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp127 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1090 = fadd <4 x float> undef, %tmp127 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp128 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -915,7 +915,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1096 = insertelement <4 x float> %tmp129, float %add1095, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1096, <4 x float>* undef, align 16 + store <4 x float> %vecins1096, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp130 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -927,7 +927,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1099 = insertelement <4 x float> %tmp131, float %add1098, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1099, <4 x float>* undef, align 16 + store <4 x float> %vecins1099, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp132 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -939,9 +939,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1102 = insertelement <4 x float> %tmp133, float %add1101, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1102, <4 x float>* undef, align 16 + store <4 x float> %vecins1102, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp134 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -961,9 +961,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp137 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1110 = insertelement <4 x float> %tmp137, float %val, i32 1 + %vecins1110 = insertelement <4 x float> %tmp137, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1110, <4 x float>* undef, align 16 + store <4 x float> %vecins1110, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp138 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -975,21 +975,21 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1113 = insertelement <4 x float> %tmp139, float %add1112, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1113, <4 x float>* undef, align 16 + store <4 x float> %vecins1113, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1115 = fadd float %val, 0x4072B33340000000 + %add1115 = fadd float undef, 0x4072B33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1116 = insertelement <4 x float> undef, float %add1115, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1116, <4 x float>* undef, align 16 + store <4 x float> %vecins1116, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp140 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1118 = fadd <4 x float> %tmp140, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1118, <4 x float>* undef, align 16 + store <4 x float> %add1118, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp141 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -999,7 +999,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1121 = insertelement <4 x float> undef, float %add1120, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1121, <4 x float>* undef, align 16 + store <4 x float> %vecins1121, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp142 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1013,9 +1013,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1125 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1127 = insertelement <4 x float> undef, float %val, i32 2 + %vecins1127 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1127, <4 x float>* undef, align 16 + store <4 x float> %vecins1127, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp144 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1027,7 +1027,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1130 = insertelement <4 x float> %tmp145, float %add1129, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp146 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1045,7 +1045,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1135 = insertelement <4 x float> %tmp149, float %add1134, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1135, <4 x float>* undef, align 16 + store <4 x float> %vecins1135, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp150 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1053,13 +1053,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp151 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1138 = insertelement <4 x float> %tmp151, float %val, i32 1 + %vecins1138 = insertelement <4 x float> %tmp151, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1138, <4 x float>* undef, align 16 + store <4 x float> %vecins1138, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp152 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1140 = fadd float %val, 0x407AE999A0000000 + %add1140 = fadd float undef, 0x407AE999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp153 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1073,7 +1073,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1144 = insertelement <4 x float> %tmp154, float %add1143, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1144, <4 x float>* undef, align 16 + store <4 x float> %vecins1144, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp155 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1081,27 +1081,27 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1146 = fadd <4 x float> %tmp156, %tmp155 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1146, <4 x float>* undef, align 16 + store <4 x float> %add1146, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp157 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1148 = fadd float %val, 4.145000e+02 + %add1148 = fadd float undef, 4.145000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp158 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1158 = insertelement <4 x float> undef, float %val, i32 3 + %vecins1158 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1158, <4 x float>* undef, align 16 + store <4 x float> %vecins1158, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1218 = fadd float %val, 0xC078733340000000 + %add1218 = fadd float undef, 0xC078733340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1219 = insertelement <4 x float> undef, float %add1218, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp159 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1113,7 +1113,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1281 = insertelement <4 x float> %tmp160, float %add1280, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1281, <4 x float>* undef, align 16 + store <4 x float> %vecins1281, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp161 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1125,7 +1125,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1284 = insertelement <4 x float> %tmp162, float %add1283, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1284, <4 x float>* undef, align 16 + store <4 x float> %vecins1284, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp163 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1133,27 +1133,27 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1286 = fadd <4 x float> %tmp164, %tmp163 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1286, <4 x float>* undef, align 16 + store <4 x float> %add1286, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp165 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1288 = fadd float %val, 0xC0731199A0000000 + %add1288 = fadd float undef, 0xC0731199A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp166 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp167 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1444 = extractelement <4 x float> %tmp167, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1460 = insertelement <4 x float> undef, float %val, i32 1 + %vecins1460 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1460, <4 x float>* undef, align 16 + store <4 x float> %vecins1460, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp168 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1462 = fadd float %val, -1.670000e+02 + %add1462 = fadd float undef, -1.670000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1463 = insertelement <4 x float> undef, float %add1462, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1167,9 +1167,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1466 = insertelement <4 x float> %tmp170, float %add1465, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1466, <4 x float>* undef, align 16 + store <4 x float> %vecins1466, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp171 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1177,17 +1177,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1468 = fadd <4 x float> %tmp172, %tmp171 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1468, <4 x float>* undef, align 16 + store <4 x float> %add1468, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp173 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1470 = fadd float %val, 0x4033B33340000000 + %add1470 = fadd float undef, 0x4033B33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp174 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1471 = insertelement <4 x float> %tmp174, float %add1470, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1471, <4 x float>* undef, align 16 + store <4 x float> %vecins1471, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp175 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1205,9 +1205,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp178 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1477 = insertelement <4 x float> %tmp178, float %val, i32 2 + %vecins1477 = insertelement <4 x float> %tmp178, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1477, <4 x float>* undef, align 16 + store <4 x float> %vecins1477, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp179 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1219,15 +1219,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1480 = insertelement <4 x float> %tmp180, float %add1479, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1480, <4 x float>* undef, align 16 + store <4 x float> %vecins1480, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp181 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp182 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp183 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1241,9 +1241,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1486 = extractelement <4 x float> %tmp185, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1502 = insertelement <4 x float> undef, float %val, i32 1 + %vecins1502 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1502, <4 x float>* undef, align 16 + store <4 x float> %vecins1502, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1503 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1253,7 +1253,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1505 = insertelement <4 x float> %tmp186, float %add1504, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1505, <4 x float>* undef, align 16 + store <4 x float> %vecins1505, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp187 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1265,9 +1265,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1508 = insertelement <4 x float> %tmp188, float %add1507, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1508, <4 x float>* undef, align 16 + store <4 x float> %vecins1508, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp189 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1275,7 +1275,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1510 = fadd <4 x float> %tmp190, %tmp189 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1510, <4 x float>* undef, align 16 + store <4 x float> %add1510, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp191 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1289,13 +1289,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1656 = insertelement <4 x float> %tmp193, float %add1655, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1656, <4 x float>* undef, align 16 + store <4 x float> %vecins1656, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1658 = fadd float %val, 0x40709999A0000000 + %add1658 = fadd float undef, 0x40709999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp194 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1660 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1305,19 +1305,19 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1662 = insertelement <4 x float> %tmp195, float %add1661, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1662, <4 x float>* undef, align 16 + store <4 x float> %vecins1662, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1676 = insertelement <4 x float> undef, float %val, i32 3 + %vecins1676 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp196 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1692 = fadd <4 x float> %tmp196, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1692, <4 x float>* undef, align 16 + store <4 x float> %add1692, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp197 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1329,7 +1329,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1695 = insertelement <4 x float> %tmp198, float %add1694, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1695, <4 x float>* undef, align 16 + store <4 x float> %vecins1695, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp199 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1341,7 +1341,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1698 = insertelement <4 x float> %tmp200, float %add1697, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1698, <4 x float>* undef, align 16 + store <4 x float> %vecins1698, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp201 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1349,15 +1349,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp202 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1701 = insertelement <4 x float> %tmp202, float %val, i32 2 + %vecins1701 = insertelement <4 x float> %tmp202, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1701, <4 x float>* undef, align 16 + store <4 x float> %vecins1701, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp203 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1704 = insertelement <4 x float> undef, float %val, i32 3 + %vecins1704 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp204 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1365,9 +1365,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp206 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1709 = insertelement <4 x float> %tmp206, float %val, i32 0 + %vecins1709 = insertelement <4 x float> %tmp206, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1709, <4 x float>* undef, align 16 + store <4 x float> %vecins1709, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp207 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1375,11 +1375,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1714 = fadd float %vecext1713, 0xC0703199A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1723 = insertelement <4 x float> undef, float %val, i32 0 + %vecins1723 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp208 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1730 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1389,9 +1389,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1732 = insertelement <4 x float> %tmp209, float %add1731, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1732, <4 x float>* undef, align 16 + store <4 x float> %vecins1732, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp210 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1399,7 +1399,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp211 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1736 = fadd float %val, 0x407C3999A0000000 + %add1736 = fadd float undef, 0x407C3999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp212 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1415,7 +1415,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1740 = insertelement <4 x float> %tmp214, float %add1739, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1740, <4 x float>* undef, align 16 + store <4 x float> %vecins1740, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp215 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1427,25 +1427,25 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1743 = insertelement <4 x float> %tmp216, float %add1742, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1743, <4 x float>* undef, align 16 + store <4 x float> %vecins1743, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1744 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp217 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1746 = insertelement <4 x float> %tmp217, float %val, i32 3 + %vecins1746 = insertelement <4 x float> %tmp217, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp218 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1748 = fadd <4 x float> undef, %tmp218 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1748, <4 x float>* undef, align 16 + store <4 x float> %add1748, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp219 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1750 = fadd float %val, 0x407C6B3340000000 + %add1750 = fadd float undef, 0x407C6B3340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1751 = insertelement <4 x float> undef, float %add1750, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1467,21 +1467,21 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp223 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1759 = fadd float %val, 0x40678999A0000000 + %add1759 = fadd float undef, 0x40678999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp224 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1760 = insertelement <4 x float> %tmp224, float %add1759, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1760, <4 x float>* undef, align 16 + store <4 x float> %vecins1760, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp225 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1762 = fadd <4 x float> undef, %tmp225 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1762, <4 x float>* undef, align 16 + store <4 x float> %add1762, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp226 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1493,7 +1493,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1765 = insertelement <4 x float> %tmp227, float %add1764, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1765, <4 x float>* undef, align 16 + store <4 x float> %vecins1765, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp228 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1505,7 +1505,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1768 = insertelement <4 x float> %tmp229, float %add1767, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1768, <4 x float>* undef, align 16 + store <4 x float> %vecins1768, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1769 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1515,7 +1515,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1771 = insertelement <4 x float> %tmp230, float %add1770, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1771, <4 x float>* undef, align 16 + store <4 x float> %vecins1771, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp231 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1525,13 +1525,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp234 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1779 = insertelement <4 x float> %tmp234, float %val, i32 0 + %vecins1779 = insertelement <4 x float> %tmp234, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1779, <4 x float>* undef, align 16 + store <4 x float> %vecins1779, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp235 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp236 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1541,9 +1541,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1785 = insertelement <4 x float> undef, float %add1784, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1785, <4 x float>* undef, align 16 + store <4 x float> %vecins1785, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp237 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1559,25 +1559,25 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1793 = insertelement <4 x float> %tmp239, float %add1792, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1793, <4 x float>* undef, align 16 + store <4 x float> %vecins1793, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp240 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1795 = fadd float %val, 0x4055266660000000 + %add1795 = fadd float undef, 0x4055266660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp241 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1796 = insertelement <4 x float> %tmp241, float %add1795, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1799 = insertelement <4 x float> undef, float %val, i32 2 + %vecins1799 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1800 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp242 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp243 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1587,7 +1587,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp246 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1865 = fadd float %val, -2.235000e+02 + %add1865 = fadd float undef, -2.235000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp247 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1597,33 +1597,33 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp249 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1872 = insertelement <4 x float> %tmp249, float %val, i32 3 + %vecins1872 = insertelement <4 x float> %tmp249, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp250 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1874 = fadd <4 x float> %tmp250, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1874, <4 x float>* undef, align 16 + store <4 x float> %add1874, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1875 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp251 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1894 = insertelement <4 x float> %tmp251, float %val, i32 1 + %vecins1894 = insertelement <4 x float> %tmp251, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp252 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1895 = extractelement <4 x float> %tmp252, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1900 = insertelement <4 x float> undef, float %val, i32 3 + %vecins1900 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1900, <4 x float>* undef, align 16 + store <4 x float> %vecins1900, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1905 = insertelement <4 x float> undef, float %val, i32 0 + %vecins1905 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1905, <4 x float>* undef, align 16 + store <4 x float> %vecins1905, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp253 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1633,7 +1633,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1908 = insertelement <4 x float> undef, float %add1907, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1908, <4 x float>* undef, align 16 + store <4 x float> %vecins1908, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1909 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1649,23 +1649,23 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1916 = fadd <4 x float> %tmp256, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add1916, <4 x float>* undef, align 16 + store <4 x float> %add1916, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1923 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp257 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add1927 = fadd float %val, 0x40761999A0000000 + %add1927 = fadd float undef, 0x40761999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp258 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1928 = insertelement <4 x float> %tmp258, float %add1927, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1928, <4 x float>* undef, align 16 + store <4 x float> %vecins1928, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp259 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1677,9 +1677,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp262 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1933 = insertelement <4 x float> %tmp262, float %val, i32 0 + %vecins1933 = insertelement <4 x float> %tmp262, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1933, <4 x float>* undef, align 16 + store <4 x float> %vecins1933, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp263 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1693,15 +1693,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1940 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1942 = insertelement <4 x float> undef, float %val, i32 3 + %vecins1942 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp265 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp266 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp267 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1709,13 +1709,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add1946 = fadd float %vecext1945, 0xC074866660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1953 = insertelement <4 x float> undef, float %val, i32 2 + %vecins1953 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1953, <4 x float>* undef, align 16 + store <4 x float> %vecins1953, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp268 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp269 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1737,15 +1737,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1964 = insertelement <4 x float> %tmp272, float %add1963, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1964, <4 x float>* undef, align 16 + store <4 x float> %vecins1964, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1965 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp273 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1967 = insertelement <4 x float> %tmp273, float %val, i32 2 + %vecins1967 = insertelement <4 x float> %tmp273, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1967, <4 x float>* undef, align 16 + store <4 x float> %vecins1967, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp274 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1757,9 +1757,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1970 = insertelement <4 x float> %tmp275, float %add1969, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1970, <4 x float>* undef, align 16 + store <4 x float> %vecins1970, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp276 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1767,31 +1767,31 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp278 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1975 = insertelement <4 x float> %tmp278, float %val, i32 0 + %vecins1975 = insertelement <4 x float> %tmp278, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1975, <4 x float>* undef, align 16 + store <4 x float> %vecins1975, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp279 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1976 = extractelement <4 x float> %tmp279, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1978 = insertelement <4 x float> undef, float %val, i32 1 + %vecins1978 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1978, <4 x float>* undef, align 16 + store <4 x float> %vecins1978, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1979 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1981 = insertelement <4 x float> undef, float %val, i32 2 + %vecins1981 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1981, <4 x float>* undef, align 16 + store <4 x float> %vecins1981, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins1984 = insertelement <4 x float> undef, float %val, i32 3 + %vecins1984 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1984, <4 x float>* undef, align 16 + store <4 x float> %vecins1984, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext1990 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1803,11 +1803,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins1998 = insertelement <4 x float> %tmp280, float %add1997, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins1998, <4 x float>* undef, align 16 + store <4 x float> %vecins1998, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2004 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1817,7 +1817,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2006 = insertelement <4 x float> %tmp281, float %add2005, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2006, <4 x float>* undef, align 16 + store <4 x float> %vecins2006, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp282 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1825,7 +1825,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp283 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2009 = insertelement <4 x float> %tmp283, float %val, i32 2 + %vecins2009 = insertelement <4 x float> %tmp283, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp284 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1837,15 +1837,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2012 = insertelement <4 x float> %tmp285, float %add2011, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2012, <4 x float>* undef, align 16 + store <4 x float> %vecins2012, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp286 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp287 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp288 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1857,7 +1857,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2017 = insertelement <4 x float> %tmp289, float %add2016, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add2022 = fadd float %val, 8.350000e+01 + %add2022 = fadd float undef, 8.350000e+01 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp290 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1871,7 +1871,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add2028 = fadd <4 x float> %tmp292, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add2028, <4 x float>* undef, align 16 + store <4 x float> %add2028, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2029 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1879,11 +1879,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp293 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp294 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add2036 = fadd float %val, 0x407DE66660000000 + %add2036 = fadd float undef, 0x407DE66660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp295 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1895,9 +1895,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp299 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2045 = insertelement <4 x float> %tmp299, float %val, i32 0 + %vecins2045 = insertelement <4 x float> %tmp299, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2045, <4 x float>* undef, align 16 + store <4 x float> %vecins2045, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp300 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1905,35 +1905,35 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add2047 = fadd float %vecext2046, 0xC065433340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2052 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp301 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2054 = insertelement <4 x float> %tmp301, float %val, i32 3 + %vecins2054 = insertelement <4 x float> %tmp301, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2054, <4 x float>* undef, align 16 + store <4 x float> %vecins2054, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp302 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add2056 = fadd <4 x float> undef, %tmp302 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add2056, <4 x float>* undef, align 16 + store <4 x float> %add2056, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp303 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp304 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2062 = insertelement <4 x float> %tmp304, float %val, i32 1 + %vecins2062 = insertelement <4 x float> %tmp304, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2062, <4 x float>* undef, align 16 + store <4 x float> %vecins2062, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp305 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp306 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1943,9 +1943,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2068 = insertelement <4 x float> undef, float %add2067, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2068, <4 x float>* undef, align 16 + store <4 x float> %vecins2068, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp307 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1953,7 +1953,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add2070 = fadd <4 x float> %tmp308, %tmp307 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add2070, <4 x float>* undef, align 16 + store <4 x float> %add2070, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp309 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1965,7 +1965,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2073 = insertelement <4 x float> %tmp310, float %add2072, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2073, <4 x float>* undef, align 16 + store <4 x float> %vecins2073, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp311 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1973,7 +1973,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp312 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2076 = insertelement <4 x float> %tmp312, float %val, i32 1 + %vecins2076 = insertelement <4 x float> %tmp312, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp313 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1985,7 +1985,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2079 = insertelement <4 x float> %tmp314, float %add2078, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2079, <4 x float>* undef, align 16 + store <4 x float> %vecins2079, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp315 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -1997,15 +1997,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2082 = insertelement <4 x float> %tmp316, float %add2081, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2082, <4 x float>* undef, align 16 + store <4 x float> %vecins2082, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp317 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp318 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp319 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2015,7 +2015,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2087 = insertelement <4 x float> undef, float %add2086, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2087, <4 x float>* undef, align 16 + store <4 x float> %vecins2087, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2480 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2029,23 +2029,23 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2485 = insertelement <4 x float> %tmp320, float %add2484, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2485, <4 x float>* undef, align 16 + store <4 x float> %vecins2485, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp321 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add2487 = fadd float %val, 2.030000e+02 + %add2487 = fadd float undef, 2.030000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp322 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2491 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp323 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp324 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2055,9 +2055,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp325 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2499 = insertelement <4 x float> undef, float %val, i32 2 + %vecins2499 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2499, <4 x float>* undef, align 16 + store <4 x float> %vecins2499, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2500 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2079,7 +2079,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp329 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add2534 = fadd float %val, 0x4072C66660000000 + %add2534 = fadd float undef, 0x4072C66660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2536 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2089,15 +2089,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2538 = insertelement <4 x float> %tmp330, float %add2537, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2538, <4 x float>* undef, align 16 + store <4 x float> %vecins2538, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2539 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add2540 = fadd float %vecext2539, 0x406F9999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2580 = insertelement <4 x float> undef, float %val, i32 1 + %vecins2580 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2580, <4 x float>* undef, align 16 + store <4 x float> %vecins2580, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp331 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2107,7 +2107,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2583 = insertelement <4 x float> undef, float %add2582, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2583, <4 x float>* undef, align 16 + store <4 x float> %vecins2583, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2584 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2115,21 +2115,21 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp332 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add2590 = fadd float %val, 0x407B1999A0000000 + %add2590 = fadd float undef, 0x407B1999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp333 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp334 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add2672 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add2672, <4 x float>* undef, align 16 + store <4 x float> %add2672, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp335 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2141,37 +2141,37 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2678 = insertelement <4 x float> %tmp336, float %add2677, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2678, <4 x float>* undef, align 16 + store <4 x float> %vecins2678, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp337 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2679 = extractelement <4 x float> %tmp337, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2681 = insertelement <4 x float> undef, float %val, i32 2 + %vecins2681 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2681, <4 x float>* undef, align 16 + store <4 x float> %vecins2681, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp338 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext2682 = extractelement <4 x float> %tmp338, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2684 = insertelement <4 x float> undef, float %val, i32 3 + %vecins2684 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp339 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp340 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp341 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add2688 = fadd float %val, 0x4063266660000000 + %add2688 = fadd float undef, 0x4063266660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins2692 = insertelement <4 x float> undef, float %val, i32 1 + %vecins2692 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2692, <4 x float>* undef, align 16 + store <4 x float> %vecins2692, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp342 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2183,9 +2183,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins2698 = insertelement <4 x float> %tmp343, float %add2697, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins2698, <4 x float>* undef, align 16 + store <4 x float> %vecins2698, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp344 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2193,7 +2193,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add2700 = fadd <4 x float> %tmp345, %tmp344 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add2700, <4 x float>* undef, align 16 + store <4 x float> %add2700, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp346 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2207,15 +2207,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp349 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3121 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3125 = fadd float %val, 0xC06F266660000000 + %add3125 = fadd float undef, 0xC06F266660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3126 = insertelement <4 x float> undef, float %add3125, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3126, <4 x float>* undef, align 16 + store <4 x float> %vecins3126, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp350 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2227,11 +2227,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3129 = insertelement <4 x float> %tmp351, float %add3128, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3129, <4 x float>* undef, align 16 + store <4 x float> %vecins3129, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp352 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3131 = fadd float %val, 3.215000e+02 + %add3131 = fadd float undef, 3.215000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp353 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2239,15 +2239,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3134 = fadd <4 x float> %tmp354, %tmp353 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add3134, <4 x float>* undef, align 16 + store <4 x float> %add3134, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp355 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3136 = fadd float %val, 0x4074333340000000 + %add3136 = fadd float undef, 0x4074333340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3140 = insertelement <4 x float> undef, float %val, i32 1 + %vecins3140 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3140, <4 x float>* undef, align 16 + store <4 x float> %vecins3140, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp356 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2259,7 +2259,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3143 = insertelement <4 x float> %tmp357, float %add3142, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3143, <4 x float>* undef, align 16 + store <4 x float> %vecins3143, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp358 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2271,15 +2271,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3146 = insertelement <4 x float> %tmp359, float %add3145, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3146, <4 x float>* undef, align 16 + store <4 x float> %vecins3146, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp360 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3272 = insertelement <4 x float> undef, float %val, i32 3 + %vecins3272 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3272, <4 x float>* undef, align 16 + store <4 x float> %vecins3272, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp361 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2287,7 +2287,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3274 = fadd <4 x float> %tmp362, %tmp361 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add3274, <4 x float>* undef, align 16 + store <4 x float> %add3274, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp363 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2299,7 +2299,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3277 = insertelement <4 x float> %tmp364, float %add3276, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3277, <4 x float>* undef, align 16 + store <4 x float> %vecins3277, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp365 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2309,7 +2309,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3280 = insertelement <4 x float> undef, float %add3279, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3280, <4 x float>* undef, align 16 + store <4 x float> %vecins3280, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp366 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2321,7 +2321,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3283 = insertelement <4 x float> %tmp367, float %add3282, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3283, <4 x float>* undef, align 16 + store <4 x float> %vecins3283, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp368 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2333,7 +2333,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp369 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp370 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2345,7 +2345,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3291 = insertelement <4 x float> %tmp371, float %add3290, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3291, <4 x float>* undef, align 16 + store <4 x float> %vecins3291, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3292 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2353,11 +2353,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp373 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3328 = insertelement <4 x float> %tmp373, float %val, i32 3 + %vecins3328 = insertelement <4 x float> %tmp373, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3330 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add3330, <4 x float>* undef, align 16 + store <4 x float> %add3330, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3331 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2367,7 +2367,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3333 = insertelement <4 x float> %tmp374, float %add3332, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3333, <4 x float>* undef, align 16 + store <4 x float> %vecins3333, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3334 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2385,7 +2385,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3339 = insertelement <4 x float> %tmp376, float %add3338, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3339, <4 x float>* undef, align 16 + store <4 x float> %vecins3339, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp377 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2393,13 +2393,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp378 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3342 = insertelement <4 x float> %tmp378, float %val, i32 3 + %vecins3342 = insertelement <4 x float> %tmp378, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp379 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3344 = fadd <4 x float> %tmp379, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add3344, <4 x float>* undef, align 16 + store <4 x float> %add3344, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp380 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2419,15 +2419,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3350 = insertelement <4 x float> %tmp382, float %add3349, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3350, <4 x float>* undef, align 16 + store <4 x float> %vecins3350, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3352 = fadd float %val, 0xC06ACCCCC0000000 + %add3352 = fadd float undef, 0xC06ACCCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp383 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3423 = insertelement <4 x float> undef, float %val, i32 2 + %vecins3423 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3423, <4 x float>* undef, align 16 + store <4 x float> %vecins3423, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3424 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2437,9 +2437,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3426 = insertelement <4 x float> %tmp384, float %add3425, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3426, <4 x float>* undef, align 16 + store <4 x float> %vecins3426, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp385 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2457,7 +2457,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3431 = insertelement <4 x float> %tmp388, float %add3430, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3431, <4 x float>* undef, align 16 + store <4 x float> %vecins3431, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp389 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2469,15 +2469,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3434 = insertelement <4 x float> %tmp390, float %add3433, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3434, <4 x float>* undef, align 16 + store <4 x float> %vecins3434, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3435 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp391 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3437 = insertelement <4 x float> %tmp391, float %val, i32 2 + %vecins3437 = insertelement <4 x float> %tmp391, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3437, <4 x float>* undef, align 16 + store <4 x float> %vecins3437, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp392 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2485,7 +2485,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3439 = fadd float %vecext3438, 0xC071D999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp393 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2493,7 +2493,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3442 = fadd <4 x float> %tmp394, %tmp393 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add3442, <4 x float>* undef, align 16 + store <4 x float> %add3442, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3443 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2509,7 +2509,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3448 = insertelement <4 x float> %tmp396, float %add3447, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3448, <4 x float>* undef, align 16 + store <4 x float> %vecins3448, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp397 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2521,15 +2521,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3451 = insertelement <4 x float> %tmp398, float %add3450, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3451, <4 x float>* undef, align 16 + store <4 x float> %vecins3451, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3453 = fadd float %val, 0xC07ADCCCC0000000 + %add3453 = fadd float undef, 0xC07ADCCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp399 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3454 = insertelement <4 x float> %tmp399, float %add3453, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3454, <4 x float>* undef, align 16 + store <4 x float> %vecins3454, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp400 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2539,7 +2539,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3459 = insertelement <4 x float> undef, float %add3458, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3459, <4 x float>* undef, align 16 + store <4 x float> %vecins3459, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp401 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2547,19 +2547,19 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp402 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3462 = insertelement <4 x float> %tmp402, float %val, i32 1 + %vecins3462 = insertelement <4 x float> %tmp402, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3462, <4 x float>* undef, align 16 + store <4 x float> %vecins3462, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp403 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3464 = fadd float %val, 0xC057B999A0000000 + %add3464 = fadd float undef, 0xC057B999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp404 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3465 = insertelement <4 x float> %tmp404, float %add3464, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3465, <4 x float>* undef, align 16 + store <4 x float> %vecins3465, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp405 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2569,21 +2569,21 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp406 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp407 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp408 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3477 = extractelement <4 x float> %tmp408, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins3479 = insertelement <4 x float> undef, float %val, i32 2 + %vecins3479 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3479, <4 x float>* undef, align 16 + store <4 x float> %vecins3479, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3480 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2593,23 +2593,23 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3482 = insertelement <4 x float> %tmp409, float %add3481, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3482, <4 x float>* undef, align 16 + store <4 x float> %vecins3482, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp410 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3484 = fadd <4 x float> %tmp410, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add3484, <4 x float>* undef, align 16 + store <4 x float> %add3484, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp411 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3486 = fadd float %val, -1.415000e+02 + %add3486 = fadd float undef, -1.415000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3487 = insertelement <4 x float> undef, float %add3486, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3487, <4 x float>* undef, align 16 + store <4 x float> %vecins3487, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp412 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2621,25 +2621,25 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3490 = insertelement <4 x float> %tmp413, float %add3489, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3490, <4 x float>* undef, align 16 + store <4 x float> %vecins3490, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3492 = fadd float %val, 0x4078066660000000 + %add3492 = fadd float undef, 0x4078066660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp414 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3493 = insertelement <4 x float> %tmp414, float %add3492, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3493, <4 x float>* undef, align 16 + store <4 x float> %vecins3493, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp415 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3495 = fadd float %val, 0xC0798999A0000000 + %add3495 = fadd float undef, 0xC0798999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp416 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3496 = insertelement <4 x float> %tmp416, float %add3495, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3496, <4 x float>* undef, align 16 + store <4 x float> %vecins3496, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp417 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2647,7 +2647,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add3498 = fadd <4 x float> %tmp418, %tmp417 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add3498, <4 x float>* undef, align 16 + store <4 x float> %add3498, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3499 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2663,25 +2663,25 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp420 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3506 = fadd float %val, 0xC074DB3340000000 + %add3506 = fadd float undef, 0xC074DB3340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp421 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins3507 = insertelement <4 x float> %tmp421, float %add3506, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins3507, <4 x float>* undef, align 16 + store <4 x float> %vecins3507, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add3509 = fadd float %val, 0xC066033340000000 + %add3509 = fadd float undef, 0xC066033340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp422 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp423 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3513 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2693,9 +2693,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext3516 = extractelement <4 x float> %tmp425, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5414 = insertelement <4 x float> undef, float %val, i32 3 + %vecins5414 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5414, <4 x float>* undef, align 16 + store <4 x float> %vecins5414, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp426 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2703,33 +2703,33 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5416 = fadd <4 x float> %tmp427, %tmp426 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5416, <4 x float>* undef, align 16 + store <4 x float> %add5416, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp428 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5418 = fadd float %val, 0xC07ED999A0000000 + %add5418 = fadd float undef, 0xC07ED999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp429 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5419 = insertelement <4 x float> %tmp429, float %add5418, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5624 = insertelement <4 x float> undef, float %val, i32 3 + %vecins5624 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5624, <4 x float>* undef, align 16 + store <4 x float> %vecins5624, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5626 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5626, <4 x float>* undef, align 16 + store <4 x float> %add5626, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext5627 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp430 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5629 = insertelement <4 x float> %tmp430, float %val, i32 0 + %vecins5629 = insertelement <4 x float> %tmp430, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5629, <4 x float>* undef, align 16 + store <4 x float> %vecins5629, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp431 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2739,13 +2739,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5632 = insertelement <4 x float> undef, float %add5631, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5632, <4 x float>* undef, align 16 + store <4 x float> %vecins5632, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp432 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5688 = insertelement <4 x float> %tmp432, float %val, i32 1 + %vecins5688 = insertelement <4 x float> %tmp432, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5688, <4 x float>* undef, align 16 + store <4 x float> %vecins5688, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp433 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2753,35 +2753,35 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp434 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5691 = insertelement <4 x float> %tmp434, float %val, i32 2 + %vecins5691 = insertelement <4 x float> %tmp434, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5691, <4 x float>* undef, align 16 + store <4 x float> %vecins5691, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext5692 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp435 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5696 = fadd <4 x float> undef, %tmp435 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5696, <4 x float>* undef, align 16 + store <4 x float> %add5696, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5701 = fadd float %val, 0x4077D4CCC0000000 + %add5701 = fadd float undef, 0x4077D4CCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp436 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5702 = insertelement <4 x float> %tmp436, float %add5701, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5702, <4 x float>* undef, align 16 + store <4 x float> %vecins5702, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp437 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp438 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5705 = insertelement <4 x float> %tmp438, float %val, i32 2 + %vecins5705 = insertelement <4 x float> %tmp438, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5705, <4 x float>* undef, align 16 + store <4 x float> %vecins5705, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp439 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2793,9 +2793,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5708 = insertelement <4 x float> %tmp440, float %add5707, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5708, <4 x float>* undef, align 16 + store <4 x float> %vecins5708, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp441 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2803,7 +2803,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5710 = fadd <4 x float> %tmp442, %tmp441 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5710, <4 x float>* undef, align 16 + store <4 x float> %add5710, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp443 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2815,19 +2815,19 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5713 = insertelement <4 x float> %tmp444, float %add5712, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5713, <4 x float>* undef, align 16 + store <4 x float> %vecins5713, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp445 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp446 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5716 = insertelement <4 x float> %tmp446, float %val, i32 1 + %vecins5716 = insertelement <4 x float> %tmp446, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp447 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5724 = fadd <4 x float> %tmp447, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5724, <4 x float>* undef, align 16 + store <4 x float> %add5724, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp448 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2835,21 +2835,21 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp449 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5750 = insertelement <4 x float> %tmp449, float %val, i32 3 + %vecins5750 = insertelement <4 x float> %tmp449, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp450 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5752 = fadd <4 x float> undef, %tmp450 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5754 = fadd float %val, 0xC064033340000000 + %add5754 = fadd float undef, 0xC064033340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp451 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5755 = insertelement <4 x float> %tmp451, float %add5754, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5755, <4 x float>* undef, align 16 + store <4 x float> %vecins5755, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp452 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2861,7 +2861,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5758 = insertelement <4 x float> %tmp453, float %add5757, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5758, <4 x float>* undef, align 16 + store <4 x float> %vecins5758, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp454 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2869,9 +2869,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp455 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5761 = insertelement <4 x float> %tmp455, float %val, i32 2 + %vecins5761 = insertelement <4 x float> %tmp455, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5761, <4 x float>* undef, align 16 + store <4 x float> %vecins5761, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp456 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2883,13 +2883,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5764 = insertelement <4 x float> %tmp457, float %add5763, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5764, <4 x float>* undef, align 16 + store <4 x float> %vecins5764, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5766 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5766, <4 x float>* undef, align 16 + store <4 x float> %add5766, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp458 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2901,9 +2901,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5769 = insertelement <4 x float> %tmp459, float %add5768, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5769, <4 x float>* undef, align 16 + store <4 x float> %vecins5769, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5771 = fadd float %val, 8.000000e+00 + %add5771 = fadd float undef, 8.000000e+00 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp460 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2911,11 +2911,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp461 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5796 = fadd float %val, 0x4058ECCCC0000000 + %add5796 = fadd float undef, 0x4058ECCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5797 = insertelement <4 x float> undef, float %add5796, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5797, <4 x float>* undef, align 16 + store <4 x float> %vecins5797, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp462 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2923,7 +2923,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp463 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5800 = insertelement <4 x float> %tmp463, float %val, i32 1 + %vecins5800 = insertelement <4 x float> %tmp463, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp464 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2935,7 +2935,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5803 = insertelement <4 x float> %tmp465, float %add5802, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5803, <4 x float>* undef, align 16 + store <4 x float> %vecins5803, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp466 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2947,11 +2947,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5806 = insertelement <4 x float> %tmp467, float %add5805, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5806, <4 x float>* undef, align 16 + store <4 x float> %vecins5806, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp468 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp469 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2961,7 +2961,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp470 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp471 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2973,9 +2973,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5820 = insertelement <4 x float> %tmp472, float %add5819, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5820, <4 x float>* undef, align 16 + store <4 x float> %vecins5820, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp473 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2983,7 +2983,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5822 = fadd <4 x float> %tmp474, %tmp473 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5822, <4 x float>* undef, align 16 + store <4 x float> %add5822, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp475 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -2991,7 +2991,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp476 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5825 = insertelement <4 x float> %tmp476, float %val, i32 0 + %vecins5825 = insertelement <4 x float> %tmp476, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp477 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3003,7 +3003,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5828 = insertelement <4 x float> %tmp478, float %add5827, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5828, <4 x float>* undef, align 16 + store <4 x float> %vecins5828, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp479 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3015,19 +3015,19 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5831 = insertelement <4 x float> %tmp480, float %add5830, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp481 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext5837 = extractelement <4 x float> %tmp481, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5839 = insertelement <4 x float> undef, float %val, i32 0 + %vecins5839 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5839, <4 x float>* undef, align 16 + store <4 x float> %vecins5839, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp482 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3035,33 +3035,33 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp483 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5842 = insertelement <4 x float> %tmp483, float %val, i32 1 + %vecins5842 = insertelement <4 x float> %tmp483, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5842, <4 x float>* undef, align 16 + store <4 x float> %vecins5842, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp484 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp485 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5845 = insertelement <4 x float> %tmp485, float %val, i32 2 + %vecins5845 = insertelement <4 x float> %tmp485, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5845, <4 x float>* undef, align 16 + store <4 x float> %vecins5845, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5850 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5850, <4 x float>* undef, align 16 + store <4 x float> %add5850, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp486 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5852 = fadd float %val, 2.985000e+02 + %add5852 = fadd float undef, 2.985000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp487 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5853 = insertelement <4 x float> %tmp487, float %add5852, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5853, <4 x float>* undef, align 16 + store <4 x float> %vecins5853, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp488 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3073,17 +3073,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5856 = insertelement <4 x float> %tmp489, float %add5855, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5856, <4 x float>* undef, align 16 + store <4 x float> %vecins5856, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp490 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5858 = fadd float %val, 0x4071666660000000 + %add5858 = fadd float undef, 0x4071666660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp491 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5859 = insertelement <4 x float> %tmp491, float %add5858, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5859, <4 x float>* undef, align 16 + store <4 x float> %vecins5859, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp492 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3099,19 +3099,19 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5901 = insertelement <4 x float> %tmp494, float %add5900, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5901, <4 x float>* undef, align 16 + store <4 x float> %vecins5901, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add5914 = fadd float %val, 0x40786E6660000000 + %add5914 = fadd float undef, 0x40786E6660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins5918 = insertelement <4 x float> undef, float %val, i32 3 + %vecins5918 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5918, <4 x float>* undef, align 16 + store <4 x float> %vecins5918, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5920 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add5920, <4 x float>* undef, align 16 + store <4 x float> %add5920, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add5934 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3121,7 +3121,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp495 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp496 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3131,13 +3131,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins5996 = insertelement <4 x float> undef, float %add5995, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins5996, <4 x float>* undef, align 16 + store <4 x float> %vecins5996, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp497 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext5997 = extractelement <4 x float> %tmp497, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp498 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3149,15 +3149,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6002 = insertelement <4 x float> %tmp499, float %add6001, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6002, <4 x float>* undef, align 16 + store <4 x float> %vecins6002, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp500 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6004 = fadd <4 x float> undef, %tmp500 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6004, <4 x float>* undef, align 16 + store <4 x float> %add6004, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp501 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3165,7 +3165,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp502 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6007 = insertelement <4 x float> %tmp502, float %val, i32 0 + %vecins6007 = insertelement <4 x float> %tmp502, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp503 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3173,9 +3173,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp504 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6024 = insertelement <4 x float> %tmp504, float %val, i32 1 + %vecins6024 = insertelement <4 x float> %tmp504, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6024, <4 x float>* undef, align 16 + store <4 x float> %vecins6024, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp505 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3187,7 +3187,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6027 = insertelement <4 x float> %tmp506, float %add6026, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6027, <4 x float>* undef, align 16 + store <4 x float> %vecins6027, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6028 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3197,15 +3197,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6030 = insertelement <4 x float> %tmp507, float %add6029, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6030, <4 x float>* undef, align 16 + store <4 x float> %vecins6030, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp508 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp509 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp510 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3213,7 +3213,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp511 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6036 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3221,17 +3221,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6038 = insertelement <4 x float> undef, float %add6037, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6038, <4 x float>* undef, align 16 + store <4 x float> %vecins6038, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp512 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6040 = fadd float %val, 0x4071ECCCC0000000 + %add6040 = fadd float undef, 0x4071ECCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp513 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6041 = insertelement <4 x float> %tmp513, float %add6040, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6041, <4 x float>* undef, align 16 + store <4 x float> %vecins6041, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp514 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3243,9 +3243,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6044 = insertelement <4 x float> %tmp515, float %add6043, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6044, <4 x float>* undef, align 16 + store <4 x float> %vecins6044, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp516 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3253,15 +3253,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6046 = fadd <4 x float> %tmp517, %tmp516 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6046, <4 x float>* undef, align 16 + store <4 x float> %add6046, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6047 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp518 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6049 = insertelement <4 x float> %tmp518, float %val, i32 0 + %vecins6049 = insertelement <4 x float> %tmp518, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6049, <4 x float>* undef, align 16 + store <4 x float> %vecins6049, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp519 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3269,19 +3269,19 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6051 = fadd float %vecext6050, 0x407E4E6660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6055 = insertelement <4 x float> undef, float %val, i32 2 + %vecins6055 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6056 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp520 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6061 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp521 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp522 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3295,9 +3295,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6072 = insertelement <4 x float> undef, float %add6071, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6072, <4 x float>* undef, align 16 + store <4 x float> %vecins6072, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp523 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3305,7 +3305,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6074 = fadd <4 x float> %tmp524, %tmp523 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6074, <4 x float>* undef, align 16 + store <4 x float> %add6074, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp525 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3317,23 +3317,23 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6077 = insertelement <4 x float> %tmp526, float %add6076, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6077, <4 x float>* undef, align 16 + store <4 x float> %vecins6077, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp527 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6079 = fadd float %val, 0xC07E9B3340000000 + %add6079 = fadd float undef, 0xC07E9B3340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp528 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp529 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6082 = fadd float %val, 0x407DCE6660000000 + %add6082 = fadd float undef, 0x407DCE6660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6083 = insertelement <4 x float> undef, float %add6082, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6083, <4 x float>* undef, align 16 + store <4 x float> %vecins6083, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp530 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3343,9 +3343,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6086 = insertelement <4 x float> undef, float %add6085, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6086, <4 x float>* undef, align 16 + store <4 x float> %vecins6086, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp531 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3353,19 +3353,19 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6088 = fadd <4 x float> %tmp532, %tmp531 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6088, <4 x float>* undef, align 16 + store <4 x float> %add6088, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp533 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6089 = extractelement <4 x float> %tmp533, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6107 = fadd float %val, 0xC06A166660000000 + %add6107 = fadd float undef, 0xC06A166660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp534 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6108 = insertelement <4 x float> %tmp534, float %add6107, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6108, <4 x float>* undef, align 16 + store <4 x float> %vecins6108, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp535 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3375,7 +3375,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp536 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp537 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3395,7 +3395,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6119 = insertelement <4 x float> %tmp540, float %add6118, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6119, <4 x float>* undef, align 16 + store <4 x float> %vecins6119, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp541 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3407,7 +3407,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6122 = insertelement <4 x float> %tmp542, float %add6121, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6122, <4 x float>* undef, align 16 + store <4 x float> %vecins6122, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6123 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3415,17 +3415,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp543 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6126 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp544 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6128 = insertelement <4 x float> %tmp544, float %val, i32 3 + %vecins6128 = insertelement <4 x float> %tmp544, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6128, <4 x float>* undef, align 16 + store <4 x float> %vecins6128, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp545 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3441,7 +3441,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6133 = insertelement <4 x float> undef, float %add6132, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6133, <4 x float>* undef, align 16 + store <4 x float> %vecins6133, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6134 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3463,9 +3463,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp551 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6178 = insertelement <4 x float> %tmp551, float %val, i32 1 + %vecins6178 = insertelement <4 x float> %tmp551, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6178, <4 x float>* undef, align 16 + store <4 x float> %vecins6178, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp552 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3487,13 +3487,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6184 = insertelement <4 x float> %tmp555, float %add6183, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6184, <4 x float>* undef, align 16 + store <4 x float> %vecins6184, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp556 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6189 = insertelement <4 x float> undef, float %val, i32 0 + %vecins6189 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6189, <4 x float>* undef, align 16 + store <4 x float> %vecins6189, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp557 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3505,7 +3505,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6192 = insertelement <4 x float> %tmp558, float %add6191, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6192, <4 x float>* undef, align 16 + store <4 x float> %vecins6192, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp559 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3519,7 +3519,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6198 = insertelement <4 x float> %tmp561, float %add6197, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp562 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3527,7 +3527,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6200 = fadd <4 x float> %tmp563, %tmp562 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6200, <4 x float>* undef, align 16 + store <4 x float> %add6200, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp564 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3535,7 +3535,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp565 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6203 = insertelement <4 x float> %tmp565, float %val, i32 0 + %vecins6203 = insertelement <4 x float> %tmp565, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp566 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3549,9 +3549,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp568 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6209 = insertelement <4 x float> %tmp568, float %val, i32 2 + %vecins6209 = insertelement <4 x float> %tmp568, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6209, <4 x float>* undef, align 16 + store <4 x float> %vecins6209, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp569 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3559,7 +3559,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp570 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6219 = fadd float %val, 0xC0596CCCC0000000 + %add6219 = fadd float undef, 0xC0596CCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp571 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3573,7 +3573,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6228 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6228, <4 x float>* undef, align 16 + store <4 x float> %add6228, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6229 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3583,7 +3583,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6231 = insertelement <4 x float> %tmp573, float %add6230, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6231, <4 x float>* undef, align 16 + store <4 x float> %vecins6231, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp574 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3595,7 +3595,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6234 = insertelement <4 x float> %tmp575, float %add6233, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6234, <4 x float>* undef, align 16 + store <4 x float> %vecins6234, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6235 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3603,13 +3603,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6237 = insertelement <4 x float> undef, float %add6236, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6237, <4 x float>* undef, align 16 + store <4 x float> %vecins6237, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp576 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6245 = insertelement <4 x float> undef, float %val, i32 0 + %vecins6245 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6245, <4 x float>* undef, align 16 + store <4 x float> %vecins6245, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp577 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3619,17 +3619,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp578 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6251 = insertelement <4 x float> undef, float %val, i32 2 + %vecins6251 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp579 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6253 = fadd float %val, 0xC0692999A0000000 + %add6253 = fadd float undef, 0xC0692999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6254 = insertelement <4 x float> undef, float %add6253, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6254, <4 x float>* undef, align 16 + store <4 x float> %vecins6254, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp580 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3637,7 +3637,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6256 = fadd <4 x float> %tmp581, %tmp580 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6256, <4 x float>* undef, align 16 + store <4 x float> %add6256, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp582 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3649,7 +3649,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6259 = insertelement <4 x float> %tmp583, float %add6258, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6259, <4 x float>* undef, align 16 + store <4 x float> %vecins6259, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp584 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3661,7 +3661,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6262 = insertelement <4 x float> %tmp585, float %add6261, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6262, <4 x float>* undef, align 16 + store <4 x float> %vecins6262, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp586 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3669,9 +3669,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp587 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6265 = insertelement <4 x float> %tmp587, float %val, i32 2 + %vecins6265 = insertelement <4 x float> %tmp587, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6265, <4 x float>* undef, align 16 + store <4 x float> %vecins6265, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp588 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3683,9 +3683,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6268 = insertelement <4 x float> %tmp589, float %add6267, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6268, <4 x float>* undef, align 16 + store <4 x float> %vecins6268, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp590 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3693,7 +3693,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6270 = fadd <4 x float> %tmp591, %tmp590 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6270, <4 x float>* undef, align 16 + store <4 x float> %add6270, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp592 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3705,7 +3705,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6273 = insertelement <4 x float> %tmp593, float %add6272, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6273, <4 x float>* undef, align 16 + store <4 x float> %vecins6273, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp594 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3717,7 +3717,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6276 = insertelement <4 x float> %tmp595, float %add6275, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6276, <4 x float>* undef, align 16 + store <4 x float> %vecins6276, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp596 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3729,7 +3729,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6279 = insertelement <4 x float> %tmp597, float %add6278, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6279, <4 x float>* undef, align 16 + store <4 x float> %vecins6279, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp598 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3739,21 +3739,21 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6282 = insertelement <4 x float> undef, float %add6281, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6282, <4 x float>* undef, align 16 + store <4 x float> %vecins6282, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6284 = fadd <4 x float> undef, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6285 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6289 = fadd float %val, 0xC0738999A0000000 + %add6289 = fadd float undef, 0xC0738999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp599 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6293 = insertelement <4 x float> %tmp599, float %val, i32 2 + %vecins6293 = insertelement <4 x float> %tmp599, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6293, <4 x float>* undef, align 16 + store <4 x float> %vecins6293, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp600 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3763,15 +3763,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6296 = insertelement <4 x float> undef, float %add6295, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6296, <4 x float>* undef, align 16 + store <4 x float> %vecins6296, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp601 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6298 = fadd <4 x float> undef, %tmp601 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6298, <4 x float>* undef, align 16 + store <4 x float> %add6298, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp602 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3783,7 +3783,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6301 = insertelement <4 x float> %tmp603, float %add6300, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6301, <4 x float>* undef, align 16 + store <4 x float> %vecins6301, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp604 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3795,7 +3795,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6304 = insertelement <4 x float> %tmp605, float %add6303, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6304, <4 x float>* undef, align 16 + store <4 x float> %vecins6304, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp606 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3805,7 +3805,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6307 = insertelement <4 x float> undef, float %add6306, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6307, <4 x float>* undef, align 16 + store <4 x float> %vecins6307, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp607 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3817,9 +3817,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6310 = insertelement <4 x float> %tmp608, float %add6309, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6310, <4 x float>* undef, align 16 + store <4 x float> %vecins6310, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp609 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3827,7 +3827,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6312 = fadd <4 x float> %tmp610, %tmp609 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6312, <4 x float>* undef, align 16 + store <4 x float> %add6312, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp611 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3849,13 +3849,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6657 = insertelement <4 x float> %tmp614, float %add6656, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6657, <4 x float>* undef, align 16 + store <4 x float> %vecins6657, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6660 = insertelement <4 x float> undef, float %val, i32 3 + %vecins6660 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6660, <4 x float>* undef, align 16 + store <4 x float> %vecins6660, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp615 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3867,7 +3867,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6665 = insertelement <4 x float> %tmp616, float %add6664, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp617 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3875,15 +3875,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp618 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp619 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6676 = fadd <4 x float> %tmp619, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6676, <4 x float>* undef, align 16 + store <4 x float> %add6676, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp620 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3901,7 +3901,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp622 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp623 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3913,7 +3913,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6685 = insertelement <4 x float> %tmp624, float %add6684, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6685, <4 x float>* undef, align 16 + store <4 x float> %vecins6685, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp625 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3925,15 +3925,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6688 = insertelement <4 x float> %tmp626, float %add6687, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6688, <4 x float>* undef, align 16 + store <4 x float> %vecins6688, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp627 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6690 = fadd <4 x float> undef, %tmp627 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6690, <4 x float>* undef, align 16 + store <4 x float> %add6690, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp628 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3945,7 +3945,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6693 = insertelement <4 x float> %tmp629, float %add6692, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6693, <4 x float>* undef, align 16 + store <4 x float> %vecins6693, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp630 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3957,7 +3957,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6696 = insertelement <4 x float> %tmp631, float %add6695, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6696, <4 x float>* undef, align 16 + store <4 x float> %vecins6696, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp632 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3969,7 +3969,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6699 = insertelement <4 x float> %tmp633, float %add6698, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6699, <4 x float>* undef, align 16 + store <4 x float> %vecins6699, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp634 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3981,17 +3981,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6702 = insertelement <4 x float> %tmp635, float %add6701, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6702, <4 x float>* undef, align 16 + store <4 x float> %vecins6702, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp636 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp637 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6707 = insertelement <4 x float> undef, float %val, i32 0 + %vecins6707 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6707, <4 x float>* undef, align 16 + store <4 x float> %vecins6707, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp638 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -3999,7 +3999,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp639 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp640 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4031,21 +4031,21 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp645 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6726 = fadd float %val, 0x4059B999A0000000 + %add6726 = fadd float undef, 0x4059B999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp646 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6727 = insertelement <4 x float> %tmp646, float %add6726, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6727, <4 x float>* undef, align 16 + store <4 x float> %vecins6727, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6728 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6729 = fadd float %vecext6728, 0xC073466660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp647 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4053,7 +4053,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6732 = fadd <4 x float> %tmp648, %tmp647 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6732, <4 x float>* undef, align 16 + store <4 x float> %add6732, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp649 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4065,7 +4065,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6735 = insertelement <4 x float> %tmp650, float %add6734, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6735, <4 x float>* undef, align 16 + store <4 x float> %vecins6735, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp651 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4077,7 +4077,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6738 = insertelement <4 x float> %tmp652, float %add6737, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6738, <4 x float>* undef, align 16 + store <4 x float> %vecins6738, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp653 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4089,7 +4089,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6741 = insertelement <4 x float> %tmp654, float %add6740, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6741, <4 x float>* undef, align 16 + store <4 x float> %vecins6741, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp655 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4101,7 +4101,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6744 = insertelement <4 x float> %tmp656, float %add6743, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6744, <4 x float>* undef, align 16 + store <4 x float> %vecins6744, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp657 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4109,21 +4109,21 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6746 = fadd <4 x float> %tmp658, %tmp657 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6746, <4 x float>* undef, align 16 + store <4 x float> %add6746, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp659 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6749 = insertelement <4 x float> undef, float %val, i32 0 + %vecins6749 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6749, <4 x float>* undef, align 16 + store <4 x float> %vecins6749, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp660 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6751 = fadd float %val, 0x4075DE6660000000 + %add6751 = fadd float undef, 0x4075DE6660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6752 = insertelement <4 x float> undef, float %add6751, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6752, <4 x float>* undef, align 16 + store <4 x float> %vecins6752, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp661 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4133,7 +4133,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6755 = insertelement <4 x float> undef, float %add6754, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6755, <4 x float>* undef, align 16 + store <4 x float> %vecins6755, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp662 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4145,15 +4145,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6758 = insertelement <4 x float> %tmp663, float %add6757, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6758, <4 x float>* undef, align 16 + store <4 x float> %vecins6758, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp664 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6760 = fadd <4 x float> undef, %tmp664 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6760, <4 x float>* undef, align 16 + store <4 x float> %add6760, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp665 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4165,9 +4165,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp666 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp667 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4183,7 +4183,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6777 = insertelement <4 x float> %tmp669, float %add6776, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6777, <4 x float>* undef, align 16 + store <4 x float> %vecins6777, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp670 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4195,9 +4195,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6784 = extractelement <4 x float> %tmp671, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6875 = insertelement <4 x float> undef, float %val, i32 0 + %vecins6875 = insertelement <4 x float> undef, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6875, <4 x float>* undef, align 16 + store <4 x float> %vecins6875, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp672 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4207,15 +4207,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6878 = insertelement <4 x float> undef, float %add6877, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6878, <4 x float>* undef, align 16 + store <4 x float> %vecins6878, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6888 = fadd float %val, 0x4057CCCCC0000000 + %add6888 = fadd float undef, 0x4057CCCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp673 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6889 = insertelement <4 x float> %tmp673, float %add6888, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6889, <4 x float>* undef, align 16 + store <4 x float> %vecins6889, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp674 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4227,7 +4227,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6892 = insertelement <4 x float> %tmp675, float %add6891, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6892, <4 x float>* undef, align 16 + store <4 x float> %vecins6892, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp676 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4239,7 +4239,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6895 = insertelement <4 x float> %tmp677, float %add6894, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6895, <4 x float>* undef, align 16 + store <4 x float> %vecins6895, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp678 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4249,7 +4249,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6900 = fadd <4 x float> %tmp680, %tmp679 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6900, <4 x float>* undef, align 16 + store <4 x float> %add6900, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp681 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4261,9 +4261,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6903 = insertelement <4 x float> %tmp682, float %add6902, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6903, <4 x float>* undef, align 16 + store <4 x float> %vecins6903, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6905 = fadd float %val, 0x4031B33340000000 + %add6905 = fadd float undef, 0x4031B33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp683 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4271,9 +4271,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp684 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6912 = insertelement <4 x float> %tmp684, float %val, i32 3 + %vecins6912 = insertelement <4 x float> %tmp684, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp685 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4281,13 +4281,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6914 = fadd <4 x float> %tmp686, %tmp685 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6914, <4 x float>* undef, align 16 + store <4 x float> %add6914, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6915 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6920 = insertelement <4 x float> undef, float %val, i32 1 + %vecins6920 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6920, <4 x float>* undef, align 16 + store <4 x float> %vecins6920, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext6921 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4295,11 +4295,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp687 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6926 = insertelement <4 x float> %tmp687, float %val, i32 3 + %vecins6926 = insertelement <4 x float> %tmp687, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6926, <4 x float>* undef, align 16 + store <4 x float> %vecins6926, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp688 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4307,13 +4307,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6928 = fadd <4 x float> %tmp689, %tmp688 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6928, <4 x float>* undef, align 16 + store <4 x float> %add6928, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6930 = fadd float %val, -4.590000e+02 + %add6930 = fadd float undef, -4.590000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6931 = insertelement <4 x float> undef, float %add6930, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6931, <4 x float>* undef, align 16 + store <4 x float> %vecins6931, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp690 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4323,7 +4323,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp691 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp692 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4349,15 +4349,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp695 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6950 = fadd float %val, 0xC078F33340000000 + %add6950 = fadd float undef, 0xC078F33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp696 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6951 = insertelement <4 x float> %tmp696, float %add6950, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6951, <4 x float>* undef, align 16 + store <4 x float> %vecins6951, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp697 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4369,7 +4369,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6954 = insertelement <4 x float> %tmp698, float %add6953, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6954, <4 x float>* undef, align 16 + store <4 x float> %vecins6954, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp699 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4377,7 +4377,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6956 = fadd <4 x float> %tmp700, %tmp699 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6956, <4 x float>* undef, align 16 + store <4 x float> %add6956, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp701 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4389,7 +4389,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6959 = insertelement <4 x float> %tmp702, float %add6958, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6959, <4 x float>* undef, align 16 + store <4 x float> %vecins6959, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp703 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4401,15 +4401,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6965 = insertelement <4 x float> %tmp704, float %add6964, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6965, <4 x float>* undef, align 16 + store <4 x float> %vecins6965, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add6975 = fadd float %val, 0x406AF33340000000 + %add6975 = fadd float undef, 0x406AF33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp705 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6976 = insertelement <4 x float> %tmp705, float %add6975, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6976, <4 x float>* undef, align 16 + store <4 x float> %vecins6976, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp706 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4417,7 +4417,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6984 = fadd <4 x float> %tmp707, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6984, <4 x float>* undef, align 16 + store <4 x float> %add6984, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp708 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4429,7 +4429,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins6987 = insertelement <4 x float> %tmp709, float %add6986, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6987, <4 x float>* undef, align 16 + store <4 x float> %vecins6987, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp710 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4439,11 +4439,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp711 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins6996 = insertelement <4 x float> %tmp711, float %val, i32 3 + %vecins6996 = insertelement <4 x float> %tmp711, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins6996, <4 x float>* undef, align 16 + store <4 x float> %vecins6996, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp712 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4451,7 +4451,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add6998 = fadd <4 x float> %tmp713, %tmp712 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add6998, <4 x float>* undef, align 16 + store <4 x float> %add6998, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp714 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4463,7 +4463,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7001 = insertelement <4 x float> %tmp715, float %add7000, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7001, <4 x float>* undef, align 16 + store <4 x float> %vecins7001, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp716 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4475,11 +4475,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7004 = insertelement <4 x float> %tmp717, float %add7003, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp718 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7140 = fadd float %val, 0x403D333340000000 + %add7140 = fadd float undef, 0x403D333340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7141 = insertelement <4 x float> undef, float %add7140, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4489,7 +4489,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7144 = insertelement <4 x float> undef, float %add7143, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp719 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4501,15 +4501,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7150 = insertelement <4 x float> %tmp720, float %add7149, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7150, <4 x float>* undef, align 16 + store <4 x float> %vecins7150, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp721 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7152 = fadd <4 x float> %tmp721, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add7152, <4 x float>* undef, align 16 + store <4 x float> %add7152, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext7156 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4519,7 +4519,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7158 = insertelement <4 x float> %tmp722, float %add7157, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7158, <4 x float>* undef, align 16 + store <4 x float> %vecins7158, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp723 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4531,13 +4531,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7161 = insertelement <4 x float> %tmp724, float %add7160, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7161, <4 x float>* undef, align 16 + store <4 x float> %vecins7161, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7168 = fadd float %val, 0xC072F199A0000000 + %add7168 = fadd float undef, 0xC072F199A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp725 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext7170 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4545,11 +4545,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7172 = insertelement <4 x float> undef, float %add7171, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7172, <4 x float>* undef, align 16 + store <4 x float> %vecins7172, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext7173 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp726 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4559,7 +4559,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7421 = insertelement <4 x float> undef, float %add7420, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7421, <4 x float>* undef, align 16 + store <4 x float> %vecins7421, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp727 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4571,7 +4571,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7424 = insertelement <4 x float> %tmp728, float %add7423, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7424, <4 x float>* undef, align 16 + store <4 x float> %vecins7424, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp729 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4583,11 +4583,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7427 = insertelement <4 x float> %tmp730, float %add7426, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7427, <4 x float>* undef, align 16 + store <4 x float> %vecins7427, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext7428 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp731 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4599,9 +4599,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7570 = insertelement <4 x float> %tmp732, float %add7569, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7570, <4 x float>* undef, align 16 + store <4 x float> %vecins7570, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp733 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4609,7 +4609,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7572 = fadd <4 x float> %tmp734, %tmp733 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add7572, <4 x float>* undef, align 16 + store <4 x float> %add7572, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext7573 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4619,11 +4619,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7575 = insertelement <4 x float> %tmp735, float %add7574, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7575, <4 x float>* undef, align 16 + store <4 x float> %vecins7575, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp736 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7577 = fadd float %val, 0xC051666660000000 + %add7577 = fadd float undef, 0xC051666660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp737 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4635,7 +4635,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7581 = insertelement <4 x float> undef, float %add7580, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7581, <4 x float>* undef, align 16 + store <4 x float> %vecins7581, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp739 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4647,7 +4647,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7584 = insertelement <4 x float> %tmp740, float %add7583, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp741 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4655,7 +4655,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7586 = fadd <4 x float> %tmp742, %tmp741 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add7586, <4 x float>* undef, align 16 + store <4 x float> %add7586, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp743 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4665,7 +4665,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp744 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp745 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4677,15 +4677,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7592 = insertelement <4 x float> %tmp746, float %add7591, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7592, <4 x float>* undef, align 16 + store <4 x float> %vecins7592, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp747 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext7593 = extractelement <4 x float> %tmp747, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins7595 = insertelement <4 x float> undef, float %val, i32 2 + %vecins7595 = insertelement <4 x float> undef, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7595, <4 x float>* undef, align 16 + store <4 x float> %vecins7595, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp748 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4693,17 +4693,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7597 = fadd float %vecext7596, 0x407E666660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp749 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7616 = fadd float %val, 0xC04DE66660000000 + %add7616 = fadd float undef, 0xC04DE66660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp750 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7617 = insertelement <4 x float> %tmp750, float %add7616, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7617, <4 x float>* undef, align 16 + store <4 x float> %vecins7617, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp751 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4715,17 +4715,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7620 = insertelement <4 x float> %tmp752, float %add7619, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7620, <4 x float>* undef, align 16 + store <4 x float> %vecins7620, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp753 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7622 = fadd float %val, 0xC054B999A0000000 + %add7622 = fadd float undef, 0xC054B999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp754 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins7626 = insertelement <4 x float> undef, float %val, i32 3 + %vecins7626 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7626, <4 x float>* undef, align 16 + store <4 x float> %vecins7626, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp755 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4733,7 +4733,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7628 = fadd <4 x float> %tmp756, %tmp755 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add7628, <4 x float>* undef, align 16 + store <4 x float> %add7628, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp757 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4745,13 +4745,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7631 = insertelement <4 x float> %tmp758, float %add7630, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7639 = fadd float %val, 0x407C5999A0000000 + %add7639 = fadd float undef, 0x407C5999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp759 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7640 = insertelement <4 x float> %tmp759, float %add7639, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp760 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4759,9 +4759,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp761 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7644 = fadd float %val, 0xC0758999A0000000 + %add7644 = fadd float undef, 0xC0758999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp762 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4773,7 +4773,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7648 = insertelement <4 x float> %tmp763, float %add7647, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7648, <4 x float>* undef, align 16 + store <4 x float> %vecins7648, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp764 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4785,7 +4785,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7651 = insertelement <4 x float> %tmp765, float %add7650, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7651, <4 x float>* undef, align 16 + store <4 x float> %vecins7651, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp766 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4797,7 +4797,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7654 = insertelement <4 x float> %tmp767, float %add7653, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7654, <4 x float>* undef, align 16 + store <4 x float> %vecins7654, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp768 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4805,7 +4805,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7656 = fadd <4 x float> %tmp769, %tmp768 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add7656, <4 x float>* undef, align 16 + store <4 x float> %add7656, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp770 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4817,7 +4817,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7659 = insertelement <4 x float> %tmp771, float %add7658, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7659, <4 x float>* undef, align 16 + store <4 x float> %vecins7659, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp772 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4829,7 +4829,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7662 = insertelement <4 x float> %tmp773, float %add7661, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7662, <4 x float>* undef, align 16 + store <4 x float> %vecins7662, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp774 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4841,7 +4841,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7665 = insertelement <4 x float> %tmp775, float %add7664, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7665, <4 x float>* undef, align 16 + store <4 x float> %vecins7665, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp776 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4851,7 +4851,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7668 = insertelement <4 x float> undef, float %add7667, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7668, <4 x float>* undef, align 16 + store <4 x float> %vecins7668, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp777 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4873,23 +4873,23 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp781 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp782 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add7731 = fadd float %val, 1.900000e+02 + %add7731 = fadd float undef, 1.900000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp783 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins7732 = insertelement <4 x float> %tmp783, float %add7731, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7732, <4 x float>* undef, align 16 + store <4 x float> %vecins7732, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp784 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins7735 = insertelement <4 x float> %tmp784, float %val, i32 2 + %vecins7735 = insertelement <4 x float> %tmp784, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7735, <4 x float>* undef, align 16 + store <4 x float> %vecins7735, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp785 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4897,11 +4897,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7737 = fadd float %vecext7736, 0xC06AF66660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins7850 = insertelement <4 x float> undef, float %val, i32 3 + %vecins7850 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins7850, <4 x float>* undef, align 16 + store <4 x float> %vecins7850, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp786 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4909,7 +4909,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add7852 = fadd <4 x float> %tmp787, %tmp786 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add7852, <4 x float>* undef, align 16 + store <4 x float> %add7852, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp788 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4921,13 +4921,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9398 = insertelement <4 x float> %tmp789, float %add9397, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9398, <4 x float>* undef, align 16 + store <4 x float> %vecins9398, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9399 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp790 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9401 = insertelement <4 x float> %tmp790, float %val, i32 2 + %vecins9401 = insertelement <4 x float> %tmp790, float undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp791 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4939,11 +4939,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9404 = insertelement <4 x float> %tmp792, float %add9403, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9404, <4 x float>* undef, align 16 + store <4 x float> %vecins9404, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp793 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp794 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4959,7 +4959,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp796 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp797 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4971,7 +4971,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9415 = insertelement <4 x float> %tmp798, float %add9414, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9415, <4 x float>* undef, align 16 + store <4 x float> %vecins9415, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp799 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4983,9 +4983,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9418 = insertelement <4 x float> %tmp800, float %add9417, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9418, <4 x float>* undef, align 16 + store <4 x float> %vecins9418, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp801 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -4993,7 +4993,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9420 = fadd <4 x float> %tmp802, %tmp801 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9420, <4 x float>* undef, align 16 + store <4 x float> %add9420, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp803 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5001,9 +5001,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp804 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9423 = insertelement <4 x float> %tmp804, float %val, i32 0 + %vecins9423 = insertelement <4 x float> %tmp804, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9423, <4 x float>* undef, align 16 + store <4 x float> %vecins9423, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp805 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5015,17 +5015,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9426 = insertelement <4 x float> %tmp806, float %add9425, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9426, <4 x float>* undef, align 16 + store <4 x float> %vecins9426, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp807 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9428 = fadd float %val, 0xC065466660000000 + %add9428 = fadd float undef, 0xC065466660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp808 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9429 = insertelement <4 x float> %tmp808, float %add9428, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9429, <4 x float>* undef, align 16 + store <4 x float> %vecins9429, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp809 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5037,7 +5037,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9432 = insertelement <4 x float> %tmp810, float %add9431, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp811 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5045,7 +5045,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9434 = fadd <4 x float> %tmp812, %tmp811 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9436 = fadd float %val, -3.185000e+02 + %add9436 = fadd float undef, -3.185000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp813 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5053,7 +5053,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp814 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp815 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5065,7 +5065,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9443 = insertelement <4 x float> %tmp816, float %add9442, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9443, <4 x float>* undef, align 16 + store <4 x float> %vecins9443, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp817 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5077,7 +5077,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9446 = insertelement <4 x float> %tmp818, float %add9445, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9446, <4 x float>* undef, align 16 + store <4 x float> %vecins9446, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp819 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5085,23 +5085,23 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9448 = fadd <4 x float> %tmp820, %tmp819 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9448, <4 x float>* undef, align 16 + store <4 x float> %add9448, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9450 = fadd float %val, 0xC0718199A0000000 + %add9450 = fadd float undef, 0xC0718199A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp821 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9451 = insertelement <4 x float> %tmp821, float %add9450, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9451, <4 x float>* undef, align 16 + store <4 x float> %vecins9451, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp822 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp823 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9454 = insertelement <4 x float> %tmp823, float %val, i32 1 + %vecins9454 = insertelement <4 x float> %tmp823, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9454, <4 x float>* undef, align 16 + store <4 x float> %vecins9454, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp824 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5113,23 +5113,23 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9457 = insertelement <4 x float> %tmp825, float %add9456, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9457, <4 x float>* undef, align 16 + store <4 x float> %vecins9457, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9458 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp826 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9460 = insertelement <4 x float> %tmp826, float %val, i32 3 + %vecins9460 = insertelement <4 x float> %tmp826, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9460, <4 x float>* undef, align 16 + store <4 x float> %vecins9460, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp827 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9462 = fadd <4 x float> %tmp827, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9462, <4 x float>* undef, align 16 + store <4 x float> %add9462, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp828 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5137,23 +5137,23 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp829 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9465 = insertelement <4 x float> %tmp829, float %val, i32 0 + %vecins9465 = insertelement <4 x float> %tmp829, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9467 = fadd float %val, 0x405D666660000000 + %add9467 = fadd float undef, 0x405D666660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp830 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9468 = insertelement <4 x float> %tmp830, float %add9467, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9468, <4 x float>* undef, align 16 + store <4 x float> %vecins9468, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp831 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9470 = fadd float %val, 0x4077033340000000 + %add9470 = fadd float undef, 0x4077033340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp832 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9472 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5163,9 +5163,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9474 = insertelement <4 x float> %tmp833, float %add9473, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9474, <4 x float>* undef, align 16 + store <4 x float> %vecins9474, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp834 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5173,7 +5173,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9476 = fadd <4 x float> %tmp835, %tmp834 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9476, <4 x float>* undef, align 16 + store <4 x float> %add9476, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp836 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5185,17 +5185,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9479 = insertelement <4 x float> %tmp837, float %add9478, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9479, <4 x float>* undef, align 16 + store <4 x float> %vecins9479, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp838 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9481 = fadd float %val, 0x407BE33340000000 + %add9481 = fadd float undef, 0x407BE33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp839 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9482 = insertelement <4 x float> %tmp839, float %add9481, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9482, <4 x float>* undef, align 16 + store <4 x float> %vecins9482, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9483 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5205,7 +5205,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9485 = insertelement <4 x float> %tmp840, float %add9484, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9485, <4 x float>* undef, align 16 + store <4 x float> %vecins9485, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp841 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5215,13 +5215,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp842 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp843 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp844 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5229,15 +5229,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9492 = fadd float %vecext9491, 0x407C166660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9495 = fadd float %val, 0x407DBB3340000000 + %add9495 = fadd float undef, 0x407DBB3340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp845 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9496 = insertelement <4 x float> %tmp845, float %add9495, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9496, <4 x float>* undef, align 16 + store <4 x float> %vecins9496, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp846 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5249,41 +5249,41 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9499 = insertelement <4 x float> %tmp847, float %add9498, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9499, <4 x float>* undef, align 16 + store <4 x float> %vecins9499, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp848 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9501 = fadd float %val, 0x407D5CCCC0000000 + %add9501 = fadd float undef, 0x407D5CCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp849 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9502 = insertelement <4 x float> %tmp849, float %add9501, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9502, <4 x float>* undef, align 16 + store <4 x float> %vecins9502, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp850 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9504 = fadd <4 x float> %tmp850, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9504, <4 x float>* undef, align 16 + store <4 x float> %add9504, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp851 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9506 = fadd float %val, 0x4076EE6660000000 + %add9506 = fadd float undef, 0x4076EE6660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp852 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9507 = insertelement <4 x float> %tmp852, float %add9506, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9507, <4 x float>* undef, align 16 + store <4 x float> %vecins9507, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp853 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9509 = fadd float %val, 0xC0535999A0000000 + %add9509 = fadd float undef, 0xC0535999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp854 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp855 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5295,7 +5295,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9513 = insertelement <4 x float> %tmp856, float %add9512, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9513, <4 x float>* undef, align 16 + store <4 x float> %vecins9513, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp857 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5303,11 +5303,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp858 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9516 = insertelement <4 x float> %tmp858, float %val, i32 3 + %vecins9516 = insertelement <4 x float> %tmp858, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9516, <4 x float>* undef, align 16 + store <4 x float> %vecins9516, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp859 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5319,9 +5319,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp862 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9521 = insertelement <4 x float> %tmp862, float %val, i32 0 + %vecins9521 = insertelement <4 x float> %tmp862, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9521, <4 x float>* undef, align 16 + store <4 x float> %vecins9521, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp863 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5333,25 +5333,25 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9524 = insertelement <4 x float> %tmp864, float %add9523, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9524, <4 x float>* undef, align 16 + store <4 x float> %vecins9524, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp865 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9526 = fadd float %val, 0x4072833340000000 + %add9526 = fadd float undef, 0x4072833340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp866 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9527 = insertelement <4 x float> %tmp866, float %add9526, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9527, <4 x float>* undef, align 16 + store <4 x float> %vecins9527, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp867 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9530 = insertelement <4 x float> undef, float %val, i32 3 + %vecins9530 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9530, <4 x float>* undef, align 16 + store <4 x float> %vecins9530, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp868 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5363,9 +5363,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp870 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9535 = insertelement <4 x float> %tmp870, float %val, i32 0 + %vecins9535 = insertelement <4 x float> %tmp870, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9535, <4 x float>* undef, align 16 + store <4 x float> %vecins9535, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp871 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5377,7 +5377,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9538 = insertelement <4 x float> %tmp872, float %add9537, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9538, <4 x float>* undef, align 16 + store <4 x float> %vecins9538, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp873 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5385,17 +5385,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9543 = fadd float %vecext9542, 0x4050D999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9576 = fadd float %val, 0x40219999A0000000 + %add9576 = fadd float undef, 0x40219999A0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9577 = insertelement <4 x float> undef, float %add9576, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9577, <4 x float>* undef, align 16 + store <4 x float> %vecins9577, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp874 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9580 = insertelement <4 x float> undef, float %val, i32 1 + %vecins9580 = insertelement <4 x float> undef, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9580, <4 x float>* undef, align 16 + store <4 x float> %vecins9580, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp875 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5407,11 +5407,11 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9583 = insertelement <4 x float> %tmp876, float %add9582, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9583, <4 x float>* undef, align 16 + store <4 x float> %vecins9583, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp877 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9673 = extractelement <4 x float> undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5421,7 +5421,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9675 = insertelement <4 x float> %tmp878, float %add9674, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9675, <4 x float>* undef, align 16 + store <4 x float> %vecins9675, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9676 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5441,7 +5441,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9681 = insertelement <4 x float> %tmp881, float %add9680, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9681, <4 x float>* undef, align 16 + store <4 x float> %vecins9681, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp882 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5451,7 +5451,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9686 = fadd <4 x float> %tmp883, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9686, <4 x float>* undef, align 16 + store <4 x float> %add9686, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp884 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5481,19 +5481,19 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9695 = insertelement <4 x float> %tmp888, float %add9694, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9695, <4 x float>* undef, align 16 + store <4 x float> %vecins9695, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp889 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9697 = fadd float %val, 0x4058D33340000000 + %add9697 = fadd float undef, 0x4058D33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp890 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9698 = insertelement <4 x float> %tmp890, float %add9697, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9698, <4 x float>* undef, align 16 + store <4 x float> %vecins9698, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp891 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5509,7 +5509,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9703 = insertelement <4 x float> %tmp893, float %add9702, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9703, <4 x float>* undef, align 16 + store <4 x float> %vecins9703, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp894 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5521,7 +5521,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9706 = insertelement <4 x float> %tmp895, float %add9705, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9706, <4 x float>* undef, align 16 + store <4 x float> %vecins9706, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9707 = extractelement <4 x float> undef, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5531,23 +5531,23 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9709 = insertelement <4 x float> %tmp896, float %add9708, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9709, <4 x float>* undef, align 16 + store <4 x float> %vecins9709, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp897 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9710 = extractelement <4 x float> %tmp897, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9712 = insertelement <4 x float> undef, float %val, i32 3 + %vecins9712 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9712, <4 x float>* undef, align 16 + store <4 x float> %vecins9712, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp898 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9714 = fadd <4 x float> undef, %tmp898 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9714, <4 x float>* undef, align 16 + store <4 x float> %add9714, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp899 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5555,9 +5555,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp900 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9717 = insertelement <4 x float> %tmp900, float %val, i32 0 + %vecins9717 = insertelement <4 x float> %tmp900, float undef, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9717, <4 x float>* undef, align 16 + store <4 x float> %vecins9717, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp901 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5569,7 +5569,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9720 = insertelement <4 x float> %tmp902, float %add9719, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9720, <4 x float>* undef, align 16 + store <4 x float> %vecins9720, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp903 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5581,7 +5581,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9723 = insertelement <4 x float> %tmp904, float %add9722, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9723, <4 x float>* undef, align 16 + store <4 x float> %vecins9723, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp905 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5593,15 +5593,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9726 = insertelement <4 x float> %tmp906, float %add9725, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9726, <4 x float>* undef, align 16 + store <4 x float> %vecins9726, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp907 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9728 = fadd <4 x float> %tmp907, undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9728, <4 x float>* undef, align 16 + store <4 x float> %add9728, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp908 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5613,17 +5613,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9731 = insertelement <4 x float> %tmp909, float %add9730, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9731, <4 x float>* undef, align 16 + store <4 x float> %vecins9731, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp910 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9733 = fadd float %val, 0xC050F33340000000 + %add9733 = fadd float undef, 0xC050F33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp911 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9734 = insertelement <4 x float> %tmp911, float %add9733, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9734, <4 x float>* undef, align 16 + store <4 x float> %vecins9734, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp912 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5635,23 +5635,23 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9737 = insertelement <4 x float> %tmp913, float %add9736, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9737, <4 x float>* undef, align 16 + store <4 x float> %vecins9737, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp914 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9738 = extractelement <4 x float> %tmp914, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9740 = insertelement <4 x float> undef, float %val, i32 3 + %vecins9740 = insertelement <4 x float> undef, float undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9740, <4 x float>* undef, align 16 + store <4 x float> %vecins9740, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp915 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp916 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp917 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5661,7 +5661,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9745 = insertelement <4 x float> undef, float %add9744, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9745, <4 x float>* undef, align 16 + store <4 x float> %vecins9745, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp918 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5673,7 +5673,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9748 = insertelement <4 x float> %tmp919, float %add9747, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9748, <4 x float>* undef, align 16 + store <4 x float> %vecins9748, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp920 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5685,7 +5685,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9751 = insertelement <4 x float> %tmp921, float %add9750, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9751, <4 x float>* undef, align 16 + store <4 x float> %vecins9751, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp922 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5697,9 +5697,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9754 = insertelement <4 x float> %tmp923, float %add9753, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9754, <4 x float>* undef, align 16 + store <4 x float> %vecins9754, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* %.compoundliteral9755 + store <4 x float> , <4 x float>* %.compoundliteral9755 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp924 = load <4 x float>, <4 x float>* %.compoundliteral9755 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5717,7 +5717,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9759 = insertelement <4 x float> %tmp927, float %add9758, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9759, <4 x float>* undef, align 16 + store <4 x float> %vecins9759, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp928 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5729,17 +5729,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9762 = insertelement <4 x float> %tmp929, float %add9761, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9762, <4 x float>* undef, align 16 + store <4 x float> %vecins9762, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp930 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add9764 = fadd float %val, 0xC060E66660000000 + %add9764 = fadd float undef, 0xC060E66660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp931 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9765 = insertelement <4 x float> %tmp931, float %add9764, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9765, <4 x float>* undef, align 16 + store <4 x float> %vecins9765, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp932 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5751,9 +5751,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9768 = insertelement <4 x float> %tmp933, float %add9767, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9768, <4 x float>* undef, align 16 + store <4 x float> %vecins9768, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* %.compoundliteral9769 + store <4 x float> , <4 x float>* %.compoundliteral9769 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp934 = load <4 x float>, <4 x float>* %.compoundliteral9769 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5761,7 +5761,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add9770 = fadd <4 x float> %tmp935, %tmp934 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add9770, <4 x float>* undef, align 16 + store <4 x float> %add9770, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp936 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5773,7 +5773,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9773 = insertelement <4 x float> %tmp937, float %add9772, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9773, <4 x float>* undef, align 16 + store <4 x float> %vecins9773, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp938 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5785,25 +5785,25 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins9776 = insertelement <4 x float> %tmp939, float %add9775, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins9776, <4 x float>* undef, align 16 + store <4 x float> %vecins9776, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext9816 = extractelement <4 x float> undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp940 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %vecins9818 = insertelement <4 x float> %tmp940, float %val, i32 1 + %vecins9818 = insertelement <4 x float> %tmp940, float undef, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp941 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add10388 = fadd float %val, 4.755000e+02 + %add10388 = fadd float undef, 4.755000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp942 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10389 = insertelement <4 x float> %tmp942, float %add10388, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10389, <4 x float>* undef, align 16 + store <4 x float> %vecins10389, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp943 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5815,19 +5815,19 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10392 = insertelement <4 x float> %tmp944, float %add10391, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10392, <4 x float>* undef, align 16 + store <4 x float> %vecins10392, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp945 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp946 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add10405 = fadd float %val, -5.650000e+01 + %add10405 = fadd float undef, -5.650000e+01 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp947 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10406 = insertelement <4 x float> %tmp947, float %add10405, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10406, <4 x float>* undef, align 16 + store <4 x float> %vecins10406, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp948 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5839,7 +5839,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10409 = insertelement <4 x float> %tmp949, float %add10408, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10409, <4 x float>* undef, align 16 + store <4 x float> %vecins10409, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp950 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5849,9 +5849,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp951 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* %.compoundliteral10413 + store <4 x float> , <4 x float>* %.compoundliteral10413 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp952 = load <4 x float>, <4 x float>* %.compoundliteral10413 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5859,7 +5859,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add10414 = fadd <4 x float> %tmp953, %tmp952 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add10414, <4 x float>* undef, align 16 + store <4 x float> %add10414, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp954 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5871,7 +5871,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10417 = insertelement <4 x float> %tmp955, float %add10416, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10417, <4 x float>* undef, align 16 + store <4 x float> %vecins10417, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp956 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5883,15 +5883,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10420 = insertelement <4 x float> %tmp957, float %add10419, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10420, <4 x float>* undef, align 16 + store <4 x float> %vecins10420, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add10422 = fadd float %val, 0xC0662CCCC0000000 + %add10422 = fadd float undef, 0xC0662CCCC0000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext10424 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp958 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5899,7 +5899,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add10428 = fadd <4 x float> %tmp959, %tmp958 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add10428, <4 x float>* undef, align 16 + store <4 x float> %add10428, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp960 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5909,13 +5909,13 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp961 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add10436 = fadd float %val, 0xC06AF33340000000 + %add10436 = fadd float undef, 0xC06AF33340000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp962 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10437 = insertelement <4 x float> %tmp962, float %add10436, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10437, <4 x float>* undef, align 16 + store <4 x float> %vecins10437, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecext10438 = extractelement <4 x float> undef, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5925,9 +5925,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10440 = insertelement <4 x float> %tmp963, float %add10439, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10440, <4 x float>* undef, align 16 + store <4 x float> %vecins10440, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp964 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5941,7 +5941,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10445 = insertelement <4 x float> %tmp966, float %add10444, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10445, <4 x float>* undef, align 16 + store <4 x float> %vecins10445, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp967 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5953,7 +5953,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10448 = insertelement <4 x float> %tmp968, float %add10447, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10448, <4 x float>* undef, align 16 + store <4 x float> %vecins10448, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp969 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5965,7 +5965,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10451 = insertelement <4 x float> %tmp970, float %add10450, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10451, <4 x float>* undef, align 16 + store <4 x float> %vecins10451, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp971 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5975,7 +5975,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10454 = insertelement <4 x float> undef, float %add10453, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp972 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5983,7 +5983,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %add10456 = fadd <4 x float> %tmp973, %tmp972 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %add10456, <4 x float>* undef, align 16 + store <4 x float> %add10456, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp974 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -5993,7 +5993,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10459 = insertelement <4 x float> undef, float %add10458, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10459, <4 x float>* undef, align 16 + store <4 x float> %vecins10459, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp975 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6015,7 +6015,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10465 = insertelement <4 x float> %tmp978, float %add10464, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10465, <4 x float>* undef, align 16 + store <4 x float> %vecins10465, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp979 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6027,9 +6027,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10468 = insertelement <4 x float> %tmp980, float %add10467, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10468, <4 x float>* undef, align 16 + store <4 x float> %vecins10468, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp981 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6045,7 +6045,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10473 = insertelement <4 x float> %tmp983, float %add10472, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10473, <4 x float>* undef, align 16 + store <4 x float> %vecins10473, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp984 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6057,15 +6057,15 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10476 = insertelement <4 x float> %tmp985, float %add10475, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10476, <4 x float>* undef, align 16 + store <4 x float> %vecins10476, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add10489 = fadd float %val, 0x4074666660000000 + %add10489 = fadd float undef, 0x4074666660000000 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp986 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10490 = insertelement <4 x float> %tmp986, float %add10489, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10490, <4 x float>* undef, align 16 + store <4 x float> %vecins10490, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp987 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6079,9 +6079,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10510 = insertelement <4 x float> %tmp989, float %add10509, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10510, <4 x float>* undef, align 16 + store <4 x float> %vecins10510, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp990 = load <4 x float>, <4 x float>* undef tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6097,17 +6097,17 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10515 = insertelement <4 x float> %tmp992, float %add10514, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10515, <4 x float>* undef, align 16 + store <4 x float> %vecins10515, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp993 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - %add10562 = fadd float %val, 2.035000e+02 + %add10562 = fadd float undef, 2.035000e+02 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp994 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10563 = insertelement <4 x float> %tmp994, float %add10562, i32 2 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10563, <4 x float>* undef, align 16 + store <4 x float> %vecins10563, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp995 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6119,9 +6119,9 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10566 = insertelement <4 x float> %tmp996, float %add10565, i32 3 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10566, <4 x float>* undef, align 16 + store <4 x float> %vecins10566, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> , <4 x float>* %.compoundliteral10567 + store <4 x float> , <4 x float>* %.compoundliteral10567 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp997 = load <4 x float>, <4 x float>* %.compoundliteral10567 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6139,7 +6139,7 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10571 = insertelement <4 x float> %tmp1000, float %add10570, i32 0 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10571, <4 x float>* undef, align 16 + store <4 x float> %vecins10571, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %tmp1001 = load <4 x float>, <4 x float>* undef, align 16 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() @@ -6151,56 +6151,56 @@ tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() %vecins10574 = insertelement <4 x float> %tmp1002, float %add10573, i32 1 tail call void asm sideeffect "", "~{q0}{q1}{q2}{q3}{q4}{q5}{q6}{q7}{q8}{q9}{q10}{q11}{q12}{q13}{q14}{q15}"() - store volatile <4 x float> %vecins10574, <4 x float>* undef, align 16 + store <4 x float> %vecins10574, <4 x float>* undef, align 16 %tmp1003 = load <4 x float>, <4 x float>* undef, align 16 %vecext10575 = extractelement <4 x float> %tmp1003, i32 2 %tmp1004 = load <4 x float>, <4 x float>* undef, align 16 - %vecins10577 = insertelement <4 x float> %tmp1004, float %val, i32 2 - store volatile <4 x float> %vecins10577, <4 x float>* undef, align 16 + %vecins10577 = insertelement <4 x float> %tmp1004, float undef, i32 2 + store <4 x float> %vecins10577, <4 x float>* undef, align 16 %tmp1005 = load <4 x float>, <4 x float>* undef, align 16 %vecext10578 = extractelement <4 x float> %tmp1005, i32 3 %add10579 = fadd float %vecext10578, 0x4076566660000000 %tmp1006 = load <4 x float>, <4 x float>* undef, align 16 %vecins10580 = insertelement <4 x float> %tmp1006, float %add10579, i32 3 - store volatile <4 x float> %vecins10580, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral10581 + store <4 x float> %vecins10580, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral10581 %tmp1007 = load <4 x float>, <4 x float>* %.compoundliteral10581 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1008 = load <4 x float>, <4 x float>* undef, align 16 %vecext10583 = extractelement <4 x float> %tmp1008, i32 0 %add10584 = fadd float %vecext10583, 0xC060533340000000 %tmp1009 = load <4 x float>, <4 x float>* undef, align 16 %vecins10585 = insertelement <4 x float> %tmp1009, float %add10584, i32 0 - store volatile <4 x float> %vecins10585, <4 x float>* undef, align 16 + store <4 x float> %vecins10585, <4 x float>* undef, align 16 %tmp1010 = load <4 x float>, <4 x float>* undef, align 16 %vecext10586 = extractelement <4 x float> %tmp1010, i32 1 %add10587 = fadd float %vecext10586, 0xC0694CCCC0000000 %tmp1011 = load <4 x float>, <4 x float>* undef, align 16 %vecins10588 = insertelement <4 x float> %tmp1011, float %add10587, i32 1 - store volatile <4 x float> %vecins10588, <4 x float>* undef, align 16 + store <4 x float> %vecins10588, <4 x float>* undef, align 16 %tmp1012 = load <4 x float>, <4 x float>* undef, align 16 %vecext10589 = extractelement <4 x float> %tmp1012, i32 2 %add10590 = fadd float %vecext10589, 0xC0541999A0000000 %tmp1013 = load <4 x float>, <4 x float>* undef, align 16 %vecins10591 = insertelement <4 x float> %tmp1013, float %add10590, i32 2 - store volatile <4 x float> %vecins10591, <4 x float>* undef, align 16 + store <4 x float> %vecins10591, <4 x float>* undef, align 16 %tmp1014 = load <4 x float>, <4 x float>* undef, align 16 %vecext10592 = extractelement <4 x float> %tmp1014, i32 3 %add10593 = fadd float %vecext10592, 0xC06C566660000000 %tmp1015 = load <4 x float>, <4 x float>* undef, align 16 %vecins10594 = insertelement <4 x float> %tmp1015, float %add10593, i32 3 - store volatile <4 x float> %vecins10594, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral10595 + store <4 x float> %vecins10594, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral10595 %tmp1016 = load <4 x float>, <4 x float>* %.compoundliteral10595 %tmp1017 = load <4 x float>, <4 x float>* undef, align 16 %add10596 = fadd <4 x float> %tmp1017, %tmp1016 - store volatile <4 x float> %add10596, <4 x float>* undef, align 16 + store <4 x float> %add10596, <4 x float>* undef, align 16 %tmp1018 = load <4 x float>, <4 x float>* undef, align 16 %vecext10597 = extractelement <4 x float> %tmp1018, i32 0 %add10598 = fadd float %vecext10597, 0x40640999A0000000 %tmp1019 = load <4 x float>, <4 x float>* undef, align 16 %vecins10599 = insertelement <4 x float> %tmp1019, float %add10598, i32 0 - store volatile <4 x float> %vecins10599, <4 x float>* undef, align 16 + store <4 x float> %vecins10599, <4 x float>* undef, align 16 %tmp1020 = load <4 x float>, <4 x float>* undef, align 16 %vecext10600 = extractelement <4 x float> %tmp1020, i32 1 %add10601 = fadd float %vecext10600, 0xC073966660000000 @@ -6211,48 +6211,48 @@ %add10604 = fadd float %vecext10603, 1.780000e+02 %tmp1023 = load <4 x float>, <4 x float>* undef, align 16 %vecins10605 = insertelement <4 x float> %tmp1023, float %add10604, i32 2 - store volatile <4 x float> %vecins10605, <4 x float>* undef, align 16 + store <4 x float> %vecins10605, <4 x float>* undef, align 16 %tmp1024 = load <4 x float>, <4 x float>* undef, align 16 - %add10607 = fadd float %val, 0x4070A33340000000 + %add10607 = fadd float undef, 0x4070A33340000000 %tmp1025 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral10609 + store <4 x float> , <4 x float>* %.compoundliteral10609 %tmp1026 = load <4 x float>, <4 x float>* %.compoundliteral10609 %tmp1027 = load <4 x float>, <4 x float>* undef, align 16 %tmp1028 = load <4 x float>, <4 x float>* undef, align 16 %vecext10611 = extractelement <4 x float> %tmp1028, i32 0 %add10612 = fadd float %vecext10611, 0x40757199A0000000 %vecins10613 = insertelement <4 x float> undef, float %add10612, i32 0 - store volatile <4 x float> %vecins10613, <4 x float>* undef, align 16 + store <4 x float> %vecins10613, <4 x float>* undef, align 16 %tmp1029 = load <4 x float>, <4 x float>* undef, align 16 %vecext10614 = extractelement <4 x float> %tmp1029, i32 1 %add10615 = fadd float %vecext10614, 0x40740CCCC0000000 %tmp1030 = load <4 x float>, <4 x float>* undef, align 16 %vecins10616 = insertelement <4 x float> %tmp1030, float %add10615, i32 1 - store volatile <4 x float> %vecins10616, <4 x float>* undef, align 16 + store <4 x float> %vecins10616, <4 x float>* undef, align 16 %tmp1031 = load <4 x float>, <4 x float>* undef, align 16 %vecext10617 = extractelement <4 x float> %tmp1031, i32 2 %add10618 = fadd float %vecext10617, 0xC012CCCCC0000000 %tmp1032 = load <4 x float>, <4 x float>* undef, align 16 %vecins10619 = insertelement <4 x float> %tmp1032, float %add10618, i32 2 - store volatile <4 x float> %vecins10619, <4 x float>* undef, align 16 + store <4 x float> %vecins10619, <4 x float>* undef, align 16 %tmp1033 = load <4 x float>, <4 x float>* undef, align 16 %vecext10620 = extractelement <4 x float> %tmp1033, i32 3 %add10621 = fadd float %vecext10620, 0x406E566660000000 %tmp1034 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral10623 + store <4 x float> , <4 x float>* %.compoundliteral10623 %tmp1035 = load <4 x float>, <4 x float>* %.compoundliteral10623 %add10624 = fadd <4 x float> undef, %tmp1035 %tmp1036 = load <4 x float>, <4 x float>* undef, align 16 %vecext10625 = extractelement <4 x float> %tmp1036, i32 0 %tmp1037 = load <4 x float>, <4 x float>* undef, align 16 - %vecins10627 = insertelement <4 x float> %tmp1037, float %val, i32 0 - store volatile <4 x float> %vecins10627, <4 x float>* undef, align 16 + %vecins10627 = insertelement <4 x float> %tmp1037, float undef, i32 0 + store <4 x float> %vecins10627, <4 x float>* undef, align 16 %tmp1038 = load <4 x float>, <4 x float>* undef, align 16 %vecext10628 = extractelement <4 x float> %tmp1038, i32 1 %add10629 = fadd float %vecext10628, 0x407E3CCCC0000000 %tmp1039 = load <4 x float>, <4 x float>* undef, align 16 %vecins10630 = insertelement <4 x float> %tmp1039, float %add10629, i32 1 - store volatile <4 x float> %vecins10630, <4 x float>* undef, align 16 + store <4 x float> %vecins10630, <4 x float>* undef, align 16 %tmp1040 = load <4 x float>, <4 x float>* undef, align 16 %vecext10631 = extractelement <4 x float> %tmp1040, i32 2 %tmp1041 = load <4 x float>, <4 x float>* undef, align 16 @@ -6261,8 +6261,8 @@ %add10635 = fadd float %vecext10634, 0xC067533340000000 %tmp1043 = load <4 x float>, <4 x float>* undef, align 16 %vecins10636 = insertelement <4 x float> %tmp1043, float %add10635, i32 3 - store volatile <4 x float> %vecins10636, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral10637 + store <4 x float> %vecins10636, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral10637 %tmp1044 = load <4 x float>, <4 x float>* undef, align 16 %add10638 = fadd <4 x float> %tmp1044, undef %tmp1045 = load <4 x float>, <4 x float>* undef, align 16 @@ -6270,94 +6270,94 @@ %add10640 = fadd float %vecext10639, 0x406CA33340000000 %tmp1046 = load <4 x float>, <4 x float>* undef, align 16 %vecins10641 = insertelement <4 x float> %tmp1046, float %add10640, i32 0 - store volatile <4 x float> %vecins10641, <4 x float>* undef, align 16 + store <4 x float> %vecins10641, <4 x float>* undef, align 16 %tmp1047 = load <4 x float>, <4 x float>* undef, align 16 %vecext10642 = extractelement <4 x float> %tmp1047, i32 1 %add10643 = fadd float %vecext10642, 0xC07C8999A0000000 %tmp1048 = load <4 x float>, <4 x float>* undef, align 16 %vecins10644 = insertelement <4 x float> %tmp1048, float %add10643, i32 1 - store volatile <4 x float> %vecins10644, <4 x float>* undef, align 16 + store <4 x float> %vecins10644, <4 x float>* undef, align 16 %tmp1049 = load <4 x float>, <4 x float>* undef, align 16 %vecext10645 = extractelement <4 x float> %tmp1049, i32 2 %tmp1050 = load <4 x float>, <4 x float>* undef, align 16 %tmp1051 = load <4 x float>, <4 x float>* undef, align 16 - %vecins10748 = insertelement <4 x float> undef, float %val, i32 3 + %vecins10748 = insertelement <4 x float> undef, float undef, i32 3 %tmp1052 = load <4 x float>, <4 x float>* %.compoundliteral10749 %add10750 = fadd <4 x float> undef, %tmp1052 - store volatile <4 x float> %add10750, <4 x float>* undef, align 16 + store <4 x float> %add10750, <4 x float>* undef, align 16 %tmp1053 = load <4 x float>, <4 x float>* undef, align 16 %vecext10751 = extractelement <4 x float> %tmp1053, i32 0 %add10752 = fadd float %vecext10751, 0x4071B33340000000 %tmp1054 = load <4 x float>, <4 x float>* undef, align 16 %vecins10753 = insertelement <4 x float> %tmp1054, float %add10752, i32 0 - store volatile <4 x float> %vecins10753, <4 x float>* undef, align 16 + store <4 x float> %vecins10753, <4 x float>* undef, align 16 %tmp1055 = load <4 x float>, <4 x float>* undef, align 16 %vecext10754 = extractelement <4 x float> %tmp1055, i32 1 %add10755 = fadd float %vecext10754, 0xC076A66660000000 %tmp1056 = load <4 x float>, <4 x float>* undef, align 16 %vecins10756 = insertelement <4 x float> %tmp1056, float %add10755, i32 1 - store volatile <4 x float> %vecins10756, <4 x float>* undef, align 16 + store <4 x float> %vecins10756, <4 x float>* undef, align 16 %tmp1057 = load <4 x float>, <4 x float>* undef, align 16 %vecext10757 = extractelement <4 x float> %tmp1057, i32 2 %add10758 = fadd float %vecext10757, 3.800000e+01 %tmp1058 = load <4 x float>, <4 x float>* undef, align 16 %vecins10759 = insertelement <4 x float> %tmp1058, float %add10758, i32 2 - store volatile <4 x float> %vecins10759, <4 x float>* undef, align 16 + store <4 x float> %vecins10759, <4 x float>* undef, align 16 %tmp1059 = load <4 x float>, <4 x float>* undef, align 16 %vecext10760 = extractelement <4 x float> %tmp1059, i32 3 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral10763 + store <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral10763 %tmp1060 = load <4 x float>, <4 x float>* %.compoundliteral10763 %tmp1061 = load <4 x float>, <4 x float>* undef, align 16 %tmp1062 = load <4 x float>, <4 x float>* undef, align 16 - %add10985 = fadd float %val, 0x405E933340000000 + %add10985 = fadd float undef, 0x405E933340000000 %tmp1063 = load <4 x float>, <4 x float>* undef, align 16 %vecins10986 = insertelement <4 x float> %tmp1063, float %add10985, i32 3 - store volatile <4 x float> %vecins10986, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral10987 + store <4 x float> %vecins10986, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral10987 %tmp1064 = load <4 x float>, <4 x float>* %.compoundliteral10987 %tmp1065 = load <4 x float>, <4 x float>* undef, align 16 - %vecins10994 = insertelement <4 x float> %tmp1065, float %val, i32 1 + %vecins10994 = insertelement <4 x float> %tmp1065, float undef, i32 1 %tmp1066 = load <4 x float>, <4 x float>* undef, align 16 %vecext10995 = extractelement <4 x float> %tmp1066, i32 2 %add10996 = fadd float %vecext10995, 0x406F9999A0000000 %tmp1067 = load <4 x float>, <4 x float>* undef, align 16 %vecins10997 = insertelement <4 x float> %tmp1067, float %add10996, i32 2 - store volatile <4 x float> %vecins10997, <4 x float>* undef, align 16 + store <4 x float> %vecins10997, <4 x float>* undef, align 16 %tmp1068 = load <4 x float>, <4 x float>* undef, align 16 %vecext10998 = extractelement <4 x float> %tmp1068, i32 3 %add10999 = fadd float %vecext10998, -2.765000e+02 %tmp1069 = load <4 x float>, <4 x float>* undef, align 16 %vecins11000 = insertelement <4 x float> %tmp1069, float %add10999, i32 3 - store volatile <4 x float> %vecins11000, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral11001 + store <4 x float> %vecins11000, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral11001 %tmp1070 = load <4 x float>, <4 x float>* undef, align 16 %add11002 = fadd <4 x float> %tmp1070, undef %vecext11003 = extractelement <4 x float> undef, i32 0 %vecext11009 = extractelement <4 x float> undef, i32 2 %tmp1071 = load <4 x float>, <4 x float>* undef, align 16 - %vecins11033 = insertelement <4 x float> %tmp1071, float %val, i32 0 - store volatile <4 x float> %vecins11033, <4 x float>* undef, align 16 + %vecins11033 = insertelement <4 x float> %tmp1071, float undef, i32 0 + store <4 x float> %vecins11033, <4 x float>* undef, align 16 %tmp1072 = load <4 x float>, <4 x float>* undef, align 16 %vecext11034 = extractelement <4 x float> %tmp1072, i32 1 %add11035 = fadd float %vecext11034, 0x4056D33340000000 %tmp1073 = load <4 x float>, <4 x float>* undef, align 16 %vecins11036 = insertelement <4 x float> %tmp1073, float %add11035, i32 1 - store volatile <4 x float> %vecins11036, <4 x float>* undef, align 16 + store <4 x float> %vecins11036, <4 x float>* undef, align 16 %tmp1074 = load <4 x float>, <4 x float>* undef, align 16 %vecext11037 = extractelement <4 x float> %tmp1074, i32 2 %add11038 = fadd float %vecext11037, 0xC06EA33340000000 %tmp1075 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1076 = load <4 x float>, <4 x float>* undef, align 16 %vecext11040 = extractelement <4 x float> %tmp1076, i32 3 %add11041 = fadd float %vecext11040, 0x40746CCCC0000000 %tmp1077 = load <4 x float>, <4 x float>* undef, align 16 %vecins11042 = insertelement <4 x float> %tmp1077, float %add11041, i32 3 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> , <4 x float>* undef %tmp1078 = load <4 x float>, <4 x float>* undef, align 16 %add11044 = fadd <4 x float> %tmp1078, undef - store volatile <4 x float> %add11044, <4 x float>* undef, align 16 + store <4 x float> %add11044, <4 x float>* undef, align 16 %tmp1079 = load <4 x float>, <4 x float>* undef, align 16 %vecext11045 = extractelement <4 x float> %tmp1079, i32 0 %add11046 = fadd float %vecext11045, 0xC076E66660000000 @@ -6366,58 +6366,58 @@ %tmp1081 = load <4 x float>, <4 x float>* undef, align 16 %vecext11048 = extractelement <4 x float> %tmp1081, i32 1 %add11049 = fadd float %vecext11048, 4.100000e+02 - %vecins11064 = insertelement <4 x float> undef, float %val, i32 1 - %add11074 = fadd float %val, 0xC06FF999A0000000 + %vecins11064 = insertelement <4 x float> undef, float undef, i32 1 + %add11074 = fadd float undef, 0xC06FF999A0000000 %tmp1082 = load <4 x float>, <4 x float>* undef, align 16 %vecins11075 = insertelement <4 x float> %tmp1082, float %add11074, i32 0 - store volatile <4 x float> %vecins11075, <4 x float>* undef, align 16 - %add11077 = fadd float %val, 0xC075D33340000000 + store <4 x float> %vecins11075, <4 x float>* undef, align 16 + %add11077 = fadd float undef, 0xC075D33340000000 %tmp1083 = load <4 x float>, <4 x float>* undef, align 16 %tmp1084 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1085 = load <4 x float>, <4 x float>* undef, align 16 %vecext11093 = extractelement <4 x float> %tmp1085, i32 2 %add11094 = fadd float %vecext11093, 0xC07CD66660000000 %tmp1086 = load <4 x float>, <4 x float>* undef, align 16 %vecins11095 = insertelement <4 x float> %tmp1086, float %add11094, i32 2 - store volatile <4 x float> %vecins11095, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> %vecins11095, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1087 = load <4 x float>, <4 x float>* undef - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1088 = load <4 x float>, <4 x float>* undef, align 16 %vecext11513 = extractelement <4 x float> %tmp1088, i32 2 %add11514 = fadd float %vecext11513, 0xC07C7199A0000000 %vecins11515 = insertelement <4 x float> undef, float %add11514, i32 2 - store volatile <4 x float> %vecins11515, <4 x float>* undef, align 16 + store <4 x float> %vecins11515, <4 x float>* undef, align 16 %add11520 = fadd <4 x float> undef, undef - store volatile <4 x float> %add11520, <4 x float>* undef, align 16 + store <4 x float> %add11520, <4 x float>* undef, align 16 %vecext11521 = extractelement <4 x float> undef, i32 0 %add11522 = fadd float %vecext11521, 0x4041733340000000 %tmp1089 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1090 = load <4 x float>, <4 x float>* undef %tmp1091 = load <4 x float>, <4 x float>* undef, align 16 %add11562 = fadd <4 x float> %tmp1091, %tmp1090 %tmp1092 = load <4 x float>, <4 x float>* undef, align 16 - %add11564 = fadd float %val, 0xC0411999A0000000 + %add11564 = fadd float undef, 0xC0411999A0000000 %tmp1093 = load <4 x float>, <4 x float>* undef, align 16 %vecins11565 = insertelement <4 x float> %tmp1093, float %add11564, i32 0 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %vecext11586 = extractelement <4 x float> undef, i32 3 %add11587 = fadd float %vecext11586, 3.760000e+02 %tmp1094 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1095 = load <4 x float>, <4 x float>* undef %tmp1096 = load <4 x float>, <4 x float>* undef, align 16 %tmp1097 = load <4 x float>, <4 x float>* undef, align 16 %tmp1098 = load <4 x float>, <4 x float>* undef, align 16 - %vecins11593 = insertelement <4 x float> %tmp1098, float %val, i32 0 + %vecins11593 = insertelement <4 x float> %tmp1098, float undef, i32 0 %vecext11594 = extractelement <4 x float> undef, i32 1 %tmp1099 = load <4 x float>, <4 x float>* undef, align 16 - %vecins11596 = insertelement <4 x float> %tmp1099, float %val, i32 1 - store volatile <4 x float> %vecins11596, <4 x float>* undef, align 16 + %vecins11596 = insertelement <4 x float> %tmp1099, float undef, i32 1 + store <4 x float> %vecins11596, <4 x float>* undef, align 16 %tmp1100 = load <4 x float>, <4 x float>* undef, align 16 %vecext11597 = extractelement <4 x float> %tmp1100, i32 2 %add11598 = fadd float %vecext11597, 0x40430CCCC0000000 @@ -6426,34 +6426,34 @@ %tmp1102 = load <4 x float>, <4 x float>* undef, align 16 %vecext11600 = extractelement <4 x float> %tmp1102, i32 3 %tmp1103 = load <4 x float>, <4 x float>* undef, align 16 - %vecins11602 = insertelement <4 x float> %tmp1103, float %val, i32 3 - store volatile <4 x float> %vecins11602, <4 x float>* undef, align 16 + %vecins11602 = insertelement <4 x float> %tmp1103, float undef, i32 3 + store <4 x float> %vecins11602, <4 x float>* undef, align 16 %tmp1104 = load <4 x float>, <4 x float>* undef %tmp1105 = load <4 x float>, <4 x float>* undef, align 16 %add11604 = fadd <4 x float> %tmp1105, %tmp1104 %tmp1106 = load <4 x float>, <4 x float>* undef, align 16 %vecext11605 = extractelement <4 x float> %tmp1106, i32 0 %tmp1107 = load <4 x float>, <4 x float>* undef, align 16 - %vecins11607 = insertelement <4 x float> %tmp1107, float %val, i32 0 - %vecins11621 = insertelement <4 x float> undef, float %val, i32 0 - %vecins11630 = insertelement <4 x float> undef, float %val, i32 3 - store volatile <4 x float> %vecins11630, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral11631 + %vecins11607 = insertelement <4 x float> %tmp1107, float undef, i32 0 + %vecins11621 = insertelement <4 x float> undef, float undef, i32 0 + %vecins11630 = insertelement <4 x float> undef, float undef, i32 3 + store <4 x float> %vecins11630, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral11631 %tmp1108 = load <4 x float>, <4 x float>* %.compoundliteral11631 %tmp1109 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - %add11634 = fadd float %val, -1.075000e+02 + store <4 x float> undef, <4 x float>* undef, align 16 + %add11634 = fadd float undef, -1.075000e+02 %vecext11647 = extractelement <4 x float> undef, i32 0 %add11648 = fadd float %vecext11647, 0x40775999A0000000 %tmp1110 = load <4 x float>, <4 x float>* undef, align 16 %vecext11650 = extractelement <4 x float> undef, i32 1 %tmp1111 = load <4 x float>, <4 x float>* undef, align 16 - %vecins11784 = insertelement <4 x float> %tmp1111, float %val, i32 3 - store volatile <4 x float> %vecins11784, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral11785 + %vecins11784 = insertelement <4 x float> %tmp1111, float undef, i32 3 + store <4 x float> %vecins11784, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral11785 %tmp1112 = load <4 x float>, <4 x float>* %.compoundliteral11785 %add11786 = fadd <4 x float> undef, %tmp1112 - store volatile <4 x float> %add11786, <4 x float>* undef, align 16 + store <4 x float> %add11786, <4 x float>* undef, align 16 %tmp1113 = load <4 x float>, <4 x float>* undef, align 16 %vecext11787 = extractelement <4 x float> %tmp1113, i32 0 %vecext11807 = extractelement <4 x float> undef, i32 2 @@ -6463,60 +6463,60 @@ %add11811 = fadd float %vecext11810, 0x4068F66660000000 %tmp1115 = load <4 x float>, <4 x float>* undef, align 16 %vecins11812 = insertelement <4 x float> %tmp1115, float %add11811, i32 3 - store volatile <4 x float> %vecins11812, <4 x float>* undef, align 16 + store <4 x float> %vecins11812, <4 x float>* undef, align 16 %tmp1116 = load <4 x float>, <4 x float>* undef %tmp1117 = load <4 x float>, <4 x float>* undef, align 16 %vecext11958 = extractelement <4 x float> undef, i32 1 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %vecext11961 = extractelement <4 x float> undef, i32 2 %add11962 = fadd float %vecext11961, -3.680000e+02 %tmp1118 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - %add11965 = fadd float %val, 0x4061133340000000 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 + %add11965 = fadd float undef, 0x4061133340000000 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1119 = load <4 x float>, <4 x float>* undef, align 16 %vecext11975 = extractelement <4 x float> %tmp1119, i32 2 %tmp1120 = load <4 x float>, <4 x float>* undef, align 16 - %vecins11977 = insertelement <4 x float> %tmp1120, float %val, i32 2 - store volatile <4 x float> %vecins11977, <4 x float>* undef, align 16 + %vecins11977 = insertelement <4 x float> %tmp1120, float undef, i32 2 + store <4 x float> %vecins11977, <4 x float>* undef, align 16 %vecext11978 = extractelement <4 x float> undef, i32 3 %add11979 = fadd float %vecext11978, 0xC0688999A0000000 %tmp1121 = load <4 x float>, <4 x float>* undef, align 16 %vecins11980 = insertelement <4 x float> %tmp1121, float %add11979, i32 3 - store volatile <4 x float> %vecins11980, <4 x float>* undef, align 16 + store <4 x float> %vecins11980, <4 x float>* undef, align 16 %add11982 = fadd <4 x float> undef, undef - store volatile <4 x float> %add11982, <4 x float>* undef, align 16 + store <4 x float> %add11982, <4 x float>* undef, align 16 %tmp1122 = load <4 x float>, <4 x float>* undef, align 16 %vecext11983 = extractelement <4 x float> %tmp1122, i32 0 %add11984 = fadd float %vecext11983, 0xC075966660000000 %tmp1123 = load <4 x float>, <4 x float>* undef, align 16 - %vecins12005 = insertelement <4 x float> undef, float %val, i32 2 - store volatile <4 x float> %vecins12005, <4 x float>* undef, align 16 + %vecins12005 = insertelement <4 x float> undef, float undef, i32 2 + store <4 x float> %vecins12005, <4 x float>* undef, align 16 %tmp1124 = load <4 x float>, <4 x float>* undef, align 16 - %add12007 = fadd float %val, 0xC07124CCC0000000 + %add12007 = fadd float undef, 0xC07124CCC0000000 %vecins12008 = insertelement <4 x float> undef, float %add12007, i32 3 - store volatile <4 x float> %vecins12008, <4 x float>* undef, align 16 + store <4 x float> %vecins12008, <4 x float>* undef, align 16 %tmp1125 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1126 = load <4 x float>, <4 x float>* undef, align 16 - %add12012 = fadd float %val, 0xC0750CCCC0000000 + %add12012 = fadd float undef, 0xC0750CCCC0000000 %tmp1127 = load <4 x float>, <4 x float>* undef, align 16 %vecins12013 = insertelement <4 x float> %tmp1127, float %add12012, i32 0 - store volatile <4 x float> %vecins12013, <4 x float>* undef, align 16 + store <4 x float> %vecins12013, <4 x float>* undef, align 16 %tmp1128 = load <4 x float>, <4 x float>* undef, align 16 - %add12015 = fadd float %val, 0x4079CE6660000000 + %add12015 = fadd float undef, 0x4079CE6660000000 %tmp1129 = load <4 x float>, <4 x float>* undef, align 16 %vecins12016 = insertelement <4 x float> %tmp1129, float %add12015, i32 1 - store volatile <4 x float> %vecins12016, <4 x float>* undef, align 16 - %add12018 = fadd float %val, 3.555000e+02 + store <4 x float> %vecins12016, <4 x float>* undef, align 16 + %add12018 = fadd float undef, 3.555000e+02 %tmp1130 = load <4 x float>, <4 x float>* undef, align 16 %vecins12019 = insertelement <4 x float> %tmp1130, float %add12018, i32 2 %tmp1131 = load <4 x float>, <4 x float>* undef, align 16 %vecext12020 = extractelement <4 x float> %tmp1131, i32 3 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %vecext12028 = extractelement <4 x float> undef, i32 1 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1132 = load <4 x float>, <4 x float>* undef, align 16 %add12038 = fadd <4 x float> %tmp1132, undef %tmp1133 = load <4 x float>, <4 x float>* undef, align 16 @@ -6524,27 +6524,27 @@ %add12043 = fadd float %vecext12042, 0x402F9999A0000000 %tmp1134 = load <4 x float>, <4 x float>* undef, align 16 %vecins12044 = insertelement <4 x float> %tmp1134, float %add12043, i32 1 - store volatile <4 x float> %vecins12044, <4 x float>* undef, align 16 + store <4 x float> %vecins12044, <4 x float>* undef, align 16 %vecext12045 = extractelement <4 x float> undef, i32 2 %add12046 = fadd float %vecext12045, 0xC07EF33340000000 %tmp1135 = load <4 x float>, <4 x float>* undef, align 16 %vecins12047 = insertelement <4 x float> %tmp1135, float %add12046, i32 2 - store volatile <4 x float> %vecins12047, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> %vecins12047, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1136 = load <4 x float>, <4 x float>* undef, align 16 %vecext12112 = extractelement <4 x float> %tmp1136, i32 1 %tmp1137 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - %add12116 = fadd float %val, 0xC074F4CCC0000000 + store <4 x float> undef, <4 x float>* undef, align 16 + %add12116 = fadd float undef, 0xC074F4CCC0000000 %tmp1138 = load <4 x float>, <4 x float>* undef, align 16 %vecins12117 = insertelement <4 x float> %tmp1138, float %add12116, i32 2 - store volatile <4 x float> %vecins12117, <4 x float>* undef, align 16 + store <4 x float> %vecins12117, <4 x float>* undef, align 16 %tmp1139 = load <4 x float>, <4 x float>* undef, align 16 %vecext12118 = extractelement <4 x float> %tmp1139, i32 3 %add12119 = fadd float %vecext12118, 0xC0638CCCC0000000 %tmp1140 = load <4 x float>, <4 x float>* undef, align 16 %vecins12120 = insertelement <4 x float> %tmp1140, float %add12119, i32 3 - %add12152 = fadd float %val, 0x4039333340000000 + %add12152 = fadd float undef, 0x4039333340000000 %tmp1141 = load <4 x float>, <4 x float>* undef, align 16 %vecins12153 = insertelement <4 x float> %tmp1141, float %add12152, i32 0 %vecext12154 = extractelement <4 x float> undef, i32 1 @@ -6561,67 +6561,67 @@ %add12161 = fadd float %vecext12160, 0x407B1999A0000000 %tmp1146 = load <4 x float>, <4 x float>* undef, align 16 %vecins12162 = insertelement <4 x float> %tmp1146, float %add12161, i32 3 - store volatile <4 x float> %vecins12162, <4 x float>* undef, align 16 + store <4 x float> %vecins12162, <4 x float>* undef, align 16 %tmp1147 = load <4 x float>, <4 x float>* undef %tmp1148 = load <4 x float>, <4 x float>* undef, align 16 %tmp1149 = load <4 x float>, <4 x float>* undef, align 16 %vecext12182 = extractelement <4 x float> %tmp1149, i32 1 %tmp1150 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef - %add12208 = fadd float %val, 0x407854CCC0000000 + store <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef + %add12208 = fadd float undef, 0x407854CCC0000000 %tmp1151 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1152 = load <4 x float>, <4 x float>* undef, align 16 %tmp1153 = load <4 x float>, <4 x float>* undef, align 16 - %vecins12218 = insertelement <4 x float> undef, float %val, i32 3 - store volatile <4 x float> %vecins12218, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + %vecins12218 = insertelement <4 x float> undef, float undef, i32 3 + store <4 x float> %vecins12218, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1154 = load <4 x float>, <4 x float>* undef %tmp1155 = load <4 x float>, <4 x float>* undef, align 16 %add12220 = fadd <4 x float> %tmp1155, %tmp1154 %tmp1156 = load <4 x float>, <4 x float>* undef, align 16 %tmp1157 = load <4 x float>, <4 x float>* undef, align 16 - %vecins12223 = insertelement <4 x float> %tmp1157, float %val, i32 0 - store volatile <4 x float> %vecins12223, <4 x float>* undef, align 16 + %vecins12223 = insertelement <4 x float> %tmp1157, float undef, i32 0 + store <4 x float> %vecins12223, <4 x float>* undef, align 16 %tmp1158 = load <4 x float>, <4 x float>* undef, align 16 - %add12242 = fadd float %val, 0x4067E33340000000 + %add12242 = fadd float undef, 0x4067E33340000000 %tmp1159 = load <4 x float>, <4 x float>* undef, align 16 %vecins12243 = insertelement <4 x float> %tmp1159, float %add12242, i32 2 - store volatile <4 x float> %vecins12243, <4 x float>* undef, align 16 + store <4 x float> %vecins12243, <4 x float>* undef, align 16 %tmp1160 = load <4 x float>, <4 x float>* undef, align 16 %vecext12244 = extractelement <4 x float> %tmp1160, i32 3 %add12245 = fadd float %vecext12244, 0x4071AE6660000000 %tmp1161 = load <4 x float>, <4 x float>* undef, align 16 %vecins12246 = insertelement <4 x float> %tmp1161, float %add12245, i32 3 - store volatile <4 x float> %vecins12246, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral12247 + store <4 x float> %vecins12246, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral12247 %tmp1162 = load <4 x float>, <4 x float>* %.compoundliteral12247 %tmp1163 = load <4 x float>, <4 x float>* undef, align 16 %add12248 = fadd <4 x float> %tmp1163, %tmp1162 - store volatile <4 x float> %add12248, <4 x float>* undef, align 16 + store <4 x float> %add12248, <4 x float>* undef, align 16 %tmp1164 = load <4 x float>, <4 x float>* undef, align 16 %vecext12249 = extractelement <4 x float> %tmp1164, i32 0 %add12250 = fadd float %vecext12249, 1.075000e+02 %tmp1165 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1166 = load <4 x float>, <4 x float>* undef, align 16 %vecext12252 = extractelement <4 x float> %tmp1166, i32 1 %add12253 = fadd float %vecext12252, 0xC0662CCCC0000000 %tmp1167 = load <4 x float>, <4 x float>* undef, align 16 %vecins12254 = insertelement <4 x float> %tmp1167, float %add12253, i32 1 - store volatile <4 x float> %vecins12254, <4 x float>* undef, align 16 + store <4 x float> %vecins12254, <4 x float>* undef, align 16 %tmp1168 = load <4 x float>, <4 x float>* undef, align 16 %vecext12255 = extractelement <4 x float> %tmp1168, i32 2 %add12256 = fadd float %vecext12255, 0x40554CCCC0000000 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - %add13141 = fadd float %val, 0x40768999A0000000 + store <4 x float> undef, <4 x float>* undef, align 16 + %add13141 = fadd float undef, 0x40768999A0000000 %tmp1169 = load <4 x float>, <4 x float>* undef, align 16 %vecins13142 = insertelement <4 x float> %tmp1169, float %add13141, i32 3 - store volatile <4 x float> %vecins13142, <4 x float>* undef, align 16 + store <4 x float> %vecins13142, <4 x float>* undef, align 16 %tmp1170 = load <4 x float>, <4 x float>* undef %add13144 = fadd <4 x float> undef, %tmp1170 - store volatile <4 x float> %add13144, <4 x float>* undef, align 16 + store <4 x float> %add13144, <4 x float>* undef, align 16 %tmp1171 = load <4 x float>, <4 x float>* undef, align 16 %vecext13145 = extractelement <4 x float> %tmp1171, i32 0 %add13146 = fadd float %vecext13145, 3.975000e+02 @@ -6630,137 +6630,137 @@ %add13379 = fadd float %vecext13378, 0xC053B33340000000 %tmp1173 = load <4 x float>, <4 x float>* undef, align 16 %vecins13380 = insertelement <4 x float> %tmp1173, float %add13379, i32 3 - store volatile <4 x float> %vecins13380, <4 x float>* undef, align 16 + store <4 x float> %vecins13380, <4 x float>* undef, align 16 %tmp1174 = load <4 x float>, <4 x float>* undef, align 16 - %vecins13408 = insertelement <4 x float> %tmp1174, float %val, i32 3 - store volatile <4 x float> %vecins13408, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + %vecins13408 = insertelement <4 x float> %tmp1174, float undef, i32 3 + store <4 x float> %vecins13408, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1175 = load <4 x float>, <4 x float>* undef %tmp1176 = load <4 x float>, <4 x float>* undef, align 16 %add13410 = fadd <4 x float> %tmp1176, %tmp1175 - store volatile <4 x float> %add13410, <4 x float>* undef, align 16 + store <4 x float> %add13410, <4 x float>* undef, align 16 %tmp1177 = load <4 x float>, <4 x float>* undef, align 16 - %add13412 = fadd float %val, 0xC0708999A0000000 + %add13412 = fadd float undef, 0xC0708999A0000000 %tmp1178 = load <4 x float>, <4 x float>* undef, align 16 %vecins13413 = insertelement <4 x float> %tmp1178, float %add13412, i32 0 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %vecext13428 = extractelement <4 x float> undef, i32 1 %add13429 = fadd float %vecext13428, 0xC063BCCCC0000000 %tmp1179 = load <4 x float>, <4 x float>* undef, align 16 %vecins13430 = insertelement <4 x float> %tmp1179, float %add13429, i32 1 - store volatile <4 x float> %vecins13430, <4 x float>* undef, align 16 + store <4 x float> %vecins13430, <4 x float>* undef, align 16 %tmp1180 = load <4 x float>, <4 x float>* undef, align 16 %vecext13431 = extractelement <4 x float> %tmp1180, i32 2 - %vecins13433 = insertelement <4 x float> undef, float %val, i32 2 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - %add13449 = fadd float %val, 4.590000e+02 + %vecins13433 = insertelement <4 x float> undef, float undef, i32 2 + store <4 x float> undef, <4 x float>* undef, align 16 + %add13449 = fadd float undef, 4.590000e+02 %tmp1181 = load <4 x float>, <4 x float>* undef, align 16 %vecins13450 = insertelement <4 x float> %tmp1181, float %add13449, i32 3 - store volatile <4 x float> %vecins13450, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> %vecins13450, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1182 = load <4 x float>, <4 x float>* undef %tmp1183 = load <4 x float>, <4 x float>* undef, align 16 %add13452 = fadd <4 x float> %tmp1183, %tmp1182 - store volatile <4 x float> %add13452, <4 x float>* undef, align 16 + store <4 x float> %add13452, <4 x float>* undef, align 16 %tmp1184 = load <4 x float>, <4 x float>* undef, align 16 %vecext13453 = extractelement <4 x float> %tmp1184, i32 0 %add13454 = fadd float %vecext13453, 0xC072866660000000 %tmp1185 = load <4 x float>, <4 x float>* undef, align 16 %vecins13455 = insertelement <4 x float> %tmp1185, float %add13454, i32 0 - %add13471 = fadd float %val, 0xC0556CCCC0000000 + %add13471 = fadd float undef, 0xC0556CCCC0000000 %tmp1186 = load <4 x float>, <4 x float>* undef, align 16 %vecins13472 = insertelement <4 x float> %tmp1186, float %add13471, i32 1 - store volatile <4 x float> %vecins13472, <4 x float>* undef, align 16 + store <4 x float> %vecins13472, <4 x float>* undef, align 16 %tmp1187 = load <4 x float>, <4 x float>* undef, align 16 %vecext13473 = extractelement <4 x float> %tmp1187, i32 2 %add13474 = fadd float %vecext13473, 0xC0786999A0000000 %tmp1188 = load <4 x float>, <4 x float>* undef, align 16 %vecins13475 = insertelement <4 x float> %tmp1188, float %add13474, i32 2 - store volatile <4 x float> %vecins13475, <4 x float>* undef, align 16 - %add13477 = fadd float %val, 0xC07C3E6660000000 + store <4 x float> %vecins13475, <4 x float>* undef, align 16 + %add13477 = fadd float undef, 0xC07C3E6660000000 %tmp1189 = load <4 x float>, <4 x float>* undef, align 16 %vecins13478 = insertelement <4 x float> %tmp1189, float %add13477, i32 3 - store volatile <4 x float> %vecins13478, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> %vecins13478, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1190 = load <4 x float>, <4 x float>* undef, align 16 %add13480 = fadd <4 x float> %tmp1190, undef - store volatile <4 x float> %add13480, <4 x float>* undef, align 16 + store <4 x float> %add13480, <4 x float>* undef, align 16 %tmp1191 = load <4 x float>, <4 x float>* undef, align 16 %vecext13481 = extractelement <4 x float> %tmp1191, i32 0 %add13482 = fadd float %vecext13481, 0xC07BA4CCC0000000 %tmp1192 = load <4 x float>, <4 x float>* undef, align 16 %vecins13483 = insertelement <4 x float> %tmp1192, float %add13482, i32 0 - store volatile <4 x float> %vecins13483, <4 x float>* undef, align 16 + store <4 x float> %vecins13483, <4 x float>* undef, align 16 %tmp1193 = load <4 x float>, <4 x float>* undef, align 16 - %add13485 = fadd float %val, 0x406B1999A0000000 + %add13485 = fadd float undef, 0x406B1999A0000000 %tmp1194 = load <4 x float>, <4 x float>* undef, align 16 %vecins13486 = insertelement <4 x float> %tmp1194, float %add13485, i32 1 - store volatile <4 x float> %vecins13486, <4 x float>* undef, align 16 + store <4 x float> %vecins13486, <4 x float>* undef, align 16 %tmp1195 = load <4 x float>, <4 x float>* undef, align 16 %vecext13487 = extractelement <4 x float> %tmp1195, i32 2 %add13488 = fadd float %vecext13487, 0x40647999A0000000 %tmp1196 = load <4 x float>, <4 x float>* undef, align 16 %vecins13489 = insertelement <4 x float> %tmp1196, float %add13488, i32 2 - store volatile <4 x float> %vecins13489, <4 x float>* undef, align 16 + store <4 x float> %vecins13489, <4 x float>* undef, align 16 %tmp1197 = load <4 x float>, <4 x float>* undef, align 16 %vecext13490 = extractelement <4 x float> %tmp1197, i32 3 %tmp1198 = load <4 x float>, <4 x float>* undef, align 16 - %vecins13492 = insertelement <4 x float> %tmp1198, float %val, i32 3 - store volatile <4 x float> %vecins13492, <4 x float>* undef, align 16 + %vecins13492 = insertelement <4 x float> %tmp1198, float undef, i32 3 + store <4 x float> %vecins13492, <4 x float>* undef, align 16 %tmp1199 = load <4 x float>, <4 x float>* %.compoundliteral13493 %tmp1200 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 - %vecins13548 = insertelement <4 x float> undef, float %val, i32 3 - store volatile <4 x float> , <4 x float>* %.compoundliteral13549 + store <4 x float> undef, <4 x float>* undef, align 16 + %vecins13548 = insertelement <4 x float> undef, float undef, i32 3 + store <4 x float> , <4 x float>* %.compoundliteral13549 %tmp1201 = load <4 x float>, <4 x float>* undef, align 16 - %add13552 = fadd float %val, 3.230000e+02 + %add13552 = fadd float undef, 3.230000e+02 %tmp1202 = load <4 x float>, <4 x float>* undef, align 16 %vecins13553 = insertelement <4 x float> %tmp1202, float %add13552, i32 0 %tmp1203 = load <4 x float>, <4 x float>* undef, align 16 %vecext13554 = extractelement <4 x float> %tmp1203, i32 1 %tmp1204 = load <4 x float>, <4 x float>* undef, align 16 - %vecins13556 = insertelement <4 x float> %tmp1204, float %val, i32 1 - store volatile <4 x float> %vecins13556, <4 x float>* undef, align 16 + %vecins13556 = insertelement <4 x float> %tmp1204, float undef, i32 1 + store <4 x float> %vecins13556, <4 x float>* undef, align 16 %tmp1205 = load <4 x float>, <4 x float>* undef, align 16 - %add13558 = fadd float %val, 2.625000e+02 + %add13558 = fadd float undef, 2.625000e+02 %tmp1206 = load <4 x float>, <4 x float>* undef, align 16 %vecins13559 = insertelement <4 x float> %tmp1206, float %add13558, i32 2 - store volatile <4 x float> %vecins13559, <4 x float>* undef, align 16 - %add13575 = fadd float %val, -4.725000e+02 + store <4 x float> %vecins13559, <4 x float>* undef, align 16 + %add13575 = fadd float undef, -4.725000e+02 %tmp1207 = load <4 x float>, <4 x float>* undef, align 16 %vecins13576 = insertelement <4 x float> %tmp1207, float %add13575, i32 3 - store volatile <4 x float> %vecins13576, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> %vecins13576, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1208 = load <4 x float>, <4 x float>* undef %tmp1209 = load <4 x float>, <4 x float>* undef, align 16 %add13578 = fadd <4 x float> %tmp1209, %tmp1208 - store volatile <4 x float> %add13578, <4 x float>* undef, align 16 + store <4 x float> %add13578, <4 x float>* undef, align 16 %tmp1210 = load <4 x float>, <4 x float>* undef, align 16 %tmp1211 = load <4 x float>, <4 x float>* undef, align 16 %add13592 = fadd <4 x float> %tmp1211, undef - store volatile <4 x float> %add13592, <4 x float>* undef, align 16 + store <4 x float> %add13592, <4 x float>* undef, align 16 %tmp1212 = load <4 x float>, <4 x float>* undef, align 16 %vecext13593 = extractelement <4 x float> %tmp1212, i32 0 %add13594 = fadd float %vecext13593, 0xC0708B3340000000 %tmp1213 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1214 = load <4 x float>, <4 x float>* undef, align 16 %vecext13596 = extractelement <4 x float> %tmp1214, i32 1 %add13597 = fadd float %vecext13596, 0x40660999A0000000 - %vecins13604 = insertelement <4 x float> undef, float %val, i32 3 - store volatile <4 x float> %vecins13604, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + %vecins13604 = insertelement <4 x float> undef, float undef, i32 3 + store <4 x float> %vecins13604, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1215 = load <4 x float>, <4 x float>* undef, align 16 %add13606 = fadd <4 x float> %tmp1215, undef %tmp1216 = load <4 x float>, <4 x float>* undef, align 16 %vecext13607 = extractelement <4 x float> %tmp1216, i32 0 - %vecins13609 = insertelement <4 x float> undef, float %val, i32 0 + %vecins13609 = insertelement <4 x float> undef, float undef, i32 0 %tmp1217 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1218 = load <4 x float>, <4 x float>* undef, align 16 - %add13622 = fadd float %val, -3.390000e+02 + %add13622 = fadd float undef, -3.390000e+02 %vecins13623 = insertelement <4 x float> undef, float %add13622, i32 0 - store volatile <4 x float> %vecins13623, <4 x float>* undef, align 16 + store <4 x float> %vecins13623, <4 x float>* undef, align 16 %tmp1219 = load <4 x float>, <4 x float>* undef, align 16 %vecext13624 = extractelement <4 x float> %tmp1219, i32 1 %add13625 = fadd float %vecext13624, 0x405C3999A0000000 @@ -6772,41 +6772,41 @@ %add13631 = fadd float %vecext13630, 0xC060333340000000 %tmp1222 = load <4 x float>, <4 x float>* undef, align 16 %vecins13632 = insertelement <4 x float> %tmp1222, float %add13631, i32 3 - store volatile <4 x float> %vecins13632, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> %vecins13632, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1223 = load <4 x float>, <4 x float>* undef %tmp1224 = load <4 x float>, <4 x float>* undef, align 16 %add13634 = fadd <4 x float> %tmp1224, %tmp1223 - store volatile <4 x float> %add13634, <4 x float>* undef, align 16 + store <4 x float> %add13634, <4 x float>* undef, align 16 %vecext13635 = extractelement <4 x float> undef, i32 0 %add13636 = fadd float %vecext13635, 0x406A5999A0000000 %tmp1225 = load <4 x float>, <4 x float>* undef, align 16 %vecins13637 = insertelement <4 x float> %tmp1225, float %add13636, i32 0 - store volatile <4 x float> %vecins13637, <4 x float>* undef, align 16 + store <4 x float> %vecins13637, <4 x float>* undef, align 16 %tmp1226 = load <4 x float>, <4 x float>* undef, align 16 %tmp1227 = load <4 x float>, <4 x float>* undef, align 16 - %vecins13643 = insertelement <4 x float> %tmp1227, float %val, i32 2 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + %vecins13643 = insertelement <4 x float> %tmp1227, float undef, i32 2 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1228 = load <4 x float>, <4 x float>* undef, align 16 - %add13785 = fadd float %val, 0x4068866660000000 + %add13785 = fadd float undef, 0x4068866660000000 %tmp1229 = load <4 x float>, <4 x float>* undef, align 16 %vecins13786 = insertelement <4 x float> %tmp1229, float %add13785, i32 3 - store volatile <4 x float> %vecins13786, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* %.compoundliteral13787 + store <4 x float> %vecins13786, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* %.compoundliteral13787 %tmp1230 = load <4 x float>, <4 x float>* undef, align 16 %add13788 = fadd <4 x float> %tmp1230, undef %tmp1231 = load <4 x float>, <4 x float>* undef %tmp1232 = load <4 x float>, <4 x float>* undef, align 16 %add13802 = fadd <4 x float> %tmp1232, %tmp1231 - store volatile <4 x float> %add13802, <4 x float>* undef, align 16 + store <4 x float> %add13802, <4 x float>* undef, align 16 %tmp1233 = load <4 x float>, <4 x float>* undef, align 16 %vecext13803 = extractelement <4 x float> %tmp1233, i32 0 %add13804 = fadd float %vecext13803, -2.900000e+01 %tmp1234 = load <4 x float>, <4 x float>* undef, align 16 %vecins13805 = insertelement <4 x float> %tmp1234, float %add13804, i32 0 - store volatile <4 x float> %vecins13805, <4 x float>* undef, align 16 + store <4 x float> %vecins13805, <4 x float>* undef, align 16 %tmp1235 = load <4 x float>, <4 x float>* undef, align 16 - %add13807 = fadd float %val, 6.400000e+01 + %add13807 = fadd float undef, 6.400000e+01 %tmp1236 = load <4 x float>, <4 x float>* undef, align 16 %tmp1237 = load <4 x float>, <4 x float>* undef, align 16 %vecext13809 = extractelement <4 x float> %tmp1237, i32 2 @@ -6814,28 +6814,28 @@ %vecext13812 = extractelement <4 x float> %tmp1238, i32 3 %add13813 = fadd float %vecext13812, -3.615000e+02 %vecins13814 = insertelement <4 x float> undef, float %add13813, i32 3 - store volatile <4 x float> %vecins13814, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + store <4 x float> %vecins13814, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1239 = load <4 x float>, <4 x float>* undef - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1240 = load <4 x float>, <4 x float>* undef, align 16 %vecext13817 = extractelement <4 x float> %tmp1240, i32 0 - %vecins13856 = insertelement <4 x float> undef, float %val, i32 3 - store volatile <4 x float> %vecins13856, <4 x float>* undef, align 16 - store volatile <4 x float> , <4 x float>* undef + %vecins13856 = insertelement <4 x float> undef, float undef, i32 3 + store <4 x float> %vecins13856, <4 x float>* undef, align 16 + store <4 x float> , <4 x float>* undef %tmp1241 = load <4 x float>, <4 x float>* undef %tmp1242 = load <4 x float>, <4 x float>* undef, align 16 - store volatile <4 x float> undef, <4 x float>* undef, align 16 + store <4 x float> undef, <4 x float>* undef, align 16 %tmp1243 = load <4 x float>, <4 x float>* undef, align 16 %vecext13859 = extractelement <4 x float> %tmp1243, i32 0 %tmp1244 = load <4 x float>, <4 x float>* undef, align 16 - %vecins13861 = insertelement <4 x float> %tmp1244, float %val, i32 0 + %vecins13861 = insertelement <4 x float> %tmp1244, float undef, i32 0 %tmp1245 = load <4 x float>, <4 x float>* undef, align 16 %vecext13862 = extractelement <4 x float> %tmp1245, i32 1 %add13863 = fadd float %vecext13862, -1.380000e+02 %vecins13864 = insertelement <4 x float> undef, float %add13863, i32 1 - %vecins13867 = insertelement <4 x float> undef, float %val, i32 2 - store volatile <4 x float> %vecins13867, <4 x float>* undef, align 16 + %vecins13867 = insertelement <4 x float> undef, float undef, i32 2 + store <4 x float> %vecins13867, <4 x float>* undef, align 16 %tmp1246 = load <4 x float>, <4 x float>* undef, align 16 %tmp1247 = load <4 x float>, <4 x float>* undef, align 16 ret <4 x float> undef Index: llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll =================================================================== --- llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll +++ llvm/test/CodeGen/Hexagon/vect/vect-load-v4i16.ll @@ -1,13 +1,13 @@ ; RUN: llc -march=hexagon -O0 -hexagon-align-loads=0 < %s | FileCheck %s ; CHECK-LABEL: danny: -; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r0+#0) -; CHECK-DAG: [[T1:r[0-9]+]] = memuh(r0+#2) -; CHECK: [[T0]] |= asl([[T1]],#16) -; CHECK-DAG: [[T2:r[0-9]+]] = memuh(r0+#4) -; CHECK-DAG: [[T3:r[0-9]+]] = memuh(r0+#6) -; CHECK: [[T2]] |= asl([[T3]],#16) -; CHECK: combine([[T2]],[[T0]]) +; CHECK: r1 = r0 +; CHECK-DAG: [[T0:r[0-9]+]] = memuh(r1+#0) +; CHECK-DAG: [[T1:r[0-9]+]] = memuh(r1+#2) +; CHECK: r2 |= asl([[T1]],#16) +; CHECK-DAG: [[T2:r[0-9]+]] = memuh(r1+#4) +; CHECK-DAG: [[T3:r[0-9]+]] = memuh(r1+#6) +; CHECK: r1 |= asl([[T3]],#16) define <4 x i16> @danny(<4 x i16>* %p) { %t0 = load <4 x i16>, <4 x i16>* %p, align 2 ret <4 x i16> %t0 @@ -15,8 +15,8 @@ ; CHECK-LABEL: sammy: ; CHECK-DAG: [[T0:r[0-9]+]] = memw(r0+#0) -; CHECK-DAG: [[T1:r[0-9]+]] = memw(r0+#4) -; CHECK: combine([[T1]],[[T0]]) +; CHECK-DAG: r1 = memw(r0+#4) +; CHECK: r0 = [[T0]] define <4 x i16> @sammy(<4 x i16>* %p) { %t0 = load <4 x i16>, <4 x i16>* %p, align 4 ret <4 x i16> %t0 Index: llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll =================================================================== --- llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll +++ llvm/test/CodeGen/Mips/Fast-ISel/callabi.ll @@ -244,12 +244,12 @@ ; ALL-DAG: lw $[[REG_C1_ADDR:[0-9]+]], %got(c1)($[[REG_GP]]) ; ALL-DAG: lbu $[[REG_C1:[0-9]+]], 0($[[REG_C1_ADDR]]) ; 32R1-DAG: sll $[[REG_C1_1:[0-9]+]], $[[REG_C1]], 24 - ; 32R1-DAG: sra $5, $[[REG_C1_1]], 24 - ; 32R2-DAG: seb $5, $[[REG_C1]] + ; 32R1-DAG: sra $4, $[[REG_C1_1]], 24 + ; 32R2-DAG: seb $4, $[[REG_C1]] ; FIXME: andi is superfulous ; ALL-DAG: lw $[[REG_UC1_ADDR:[0-9]+]], %got(uc1)($[[REG_GP]]) ; ALL-DAG: lbu $[[REG_UC1:[0-9]+]], 0($[[REG_UC1_ADDR]]) - ; ALL-DAG: andi $4, $[[REG_UC1]], 255 + ; ALL-DAG: andi $5, $[[REG_UC1]], 255 ; ALL-DAG: lw $[[REG_S1_ADDR:[0-9]+]], %got(s1)($[[REG_GP]]) ; ALL-DAG: lhu $[[REG_S1:[0-9]+]], 0($[[REG_S1_ADDR]]) ; 32R1-DAG: sll $[[REG_S1_1:[0-9]+]], $[[REG_S1]], 16 Index: llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll =================================================================== --- llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll +++ llvm/test/CodeGen/Mips/Fast-ISel/memtest1.ll @@ -17,15 +17,9 @@ define void @cpy(i8* %src, i32 %i) { ; ALL-LABEL: cpy: - ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) - ; ALL-DAG: sw $4, 24($sp) - ; ALL-DAG: move $4, $[[T0]] - ; ALL-DAG: sw $5, 20($sp) - ; ALL-DAG: lw $[[T1:[0-9]+]], 24($sp) - ; ALL-DAG: move $5, $[[T1]] - ; ALL-DAG: lw $6, 20($sp) - ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memcpy)(${{[0-9]+}}) - ; ALL: jalr $[[T2]] + ; ALL: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) + ; ALL: lw $[[T2:[0-9]+]], %got(memcpy)(${{[0-9]+}}) + ; ALL: jalr $[[T2]] ; ALL-NEXT: nop ; ALL-NOT: {{.*}}$2{{.*}} call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds ([50 x i8], [50 x i8]* @dest, i32 0, i32 0), i8* %src, i32 %i, i1 false) @@ -36,14 +30,8 @@ ; ALL-LABEL: mov: - ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) - ; ALL-DAG: sw $4, 24($sp) - ; ALL-DAG: move $4, $[[T0]] - ; ALL-DAG: sw $5, 20($sp) - ; ALL-DAG: lw $[[T1:[0-9]+]], 24($sp) - ; ALL-DAG: move $5, $[[T1]] - ; ALL-DAG: lw $6, 20($sp) - ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memmove)(${{[0-9]+}}) + ; ALL: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) + ; ALL: lw $[[T2:[0-9]+]], %got(memmove)(${{[0-9]+}}) ; ALL: jalr $[[T2]] ; ALL-NEXT: nop ; ALL-NOT: {{.*}}$2{{.*}} @@ -54,15 +42,8 @@ define void @clear(i32 %i) { ; ALL-LABEL: clear: - ; ALL-DAG: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) - ; ALL-DAG: sw $4, 16($sp) - ; ALL-DAG: move $4, $[[T0]] - ; ALL-DAG: addiu $[[T1:[0-9]+]], $zero, 42 - ; 32R1-DAG: sll $[[T2:[0-9]+]], $[[T1]], 24 - ; 32R1-DAG: sra $5, $[[T2]], 24 - ; 32R2-DAG: seb $5, $[[T1]] - ; ALL-DAG: lw $6, 16($sp) - ; ALL-DAG: lw $[[T2:[0-9]+]], %got(memset)(${{[0-9]+}}) + ; ALL: lw $[[T0:[0-9]+]], %got(dest)(${{[0-9]+}}) + ; ALL: lw $[[T2:[0-9]+]], %got(memset)(${{[0-9]+}}) ; ALL: jalr $[[T2]] ; ALL-NEXT: nop ; ALL-NOT: {{.*}}$2{{.*}} Index: llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll =================================================================== --- llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll +++ llvm/test/CodeGen/Mips/Fast-ISel/pr40325.ll @@ -5,9 +5,10 @@ ; CHECK-LABEL: test: ; CHECK: # %bb.0: ; CHECK-NEXT: move $1, $4 -; CHECK-NEXT: andi $2, $4, 1 -; CHECK-NEXT: sb $2, 0($5) +; CHECK-NEXT: move $4, $1 ; CHECK-NEXT: andi $1, $1, 1 +; CHECK-NEXT: sb $1, 0($5) +; CHECK-NEXT: andi $1, $4, 1 ; CHECK-NEXT: bgtz $1, $BB0_1 ; CHECK-NEXT: nop ; CHECK-NEXT: # %bb.1: # %foo Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add.ll @@ -86,12 +86,11 @@ define i64 @add_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: add_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addu $1, $6, $4 -; MIPS32-NEXT: sltu $2, $1, $4 -; MIPS32-NEXT: addu $3, $7, $5 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: addu $3, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: addu $2, $6, $4 +; MIPS32-NEXT: sltu $3, $2, $4 +; MIPS32-NEXT: addu $1, $7, $5 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: addu $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -102,34 +101,30 @@ define i128 @add_i128(i128 %a, i128 %b) { ; MIPS32-LABEL: add_i128: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $sp, $sp, -8 -; MIPS32-NEXT: .cfi_def_cfa_offset 8 +; MIPS32-NEXT: move $8, $4 +; MIPS32-NEXT: move $3, $5 +; MIPS32-NEXT: move $4, $6 +; MIPS32-NEXT: addiu $1, $sp, 16 +; MIPS32-NEXT: lw $2, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 20 +; MIPS32-NEXT: lw $6, 0($1) ; MIPS32-NEXT: addiu $1, $sp, 24 +; MIPS32-NEXT: lw $5, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 28 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 28 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 32 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: addiu $8, $sp, 36 -; MIPS32-NEXT: lw $8, 0($8) -; MIPS32-NEXT: addu $1, $1, $4 -; MIPS32-NEXT: sltu $4, $1, $4 -; MIPS32-NEXT: addu $5, $2, $5 -; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: addu $2, $2, $8 +; MIPS32-NEXT: sltu $8, $2, $8 +; MIPS32-NEXT: addu $3, $6, $3 +; MIPS32-NEXT: andi $8, $8, 1 +; MIPS32-NEXT: addu $3, $3, $8 +; MIPS32-NEXT: sltu $6, $3, $6 ; MIPS32-NEXT: addu $4, $5, $4 -; MIPS32-NEXT: sltu $2, $4, $2 -; MIPS32-NEXT: addu $5, $3, $6 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: addu $2, $5, $2 -; MIPS32-NEXT: sltu $3, $2, $3 -; MIPS32-NEXT: addu $5, $8, $7 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: addu $5, $5, $3 -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: move $2, $1 -; MIPS32-NEXT: move $3, $4 -; MIPS32-NEXT: lw $4, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: addiu $sp, $sp, 8 +; MIPS32-NEXT: andi $6, $6, 1 +; MIPS32-NEXT: addu $4, $4, $6 +; MIPS32-NEXT: sltu $5, $4, $5 +; MIPS32-NEXT: addu $1, $1, $7 +; MIPS32-NEXT: andi $5, $5, 1 +; MIPS32-NEXT: addu $5, $1, $5 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/add_vec.ll @@ -4,9 +4,9 @@ define void @add_v16i8(<16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %c) { ; P5600-LABEL: add_v16i8: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.b $w0, 0($4) -; P5600-NEXT: ld.b $w1, 0($5) -; P5600-NEXT: addv.b $w0, $w1, $w0 +; P5600-NEXT: ld.b $w1, 0($4) +; P5600-NEXT: ld.b $w0, 0($5) +; P5600-NEXT: addv.b $w0, $w0, $w1 ; P5600-NEXT: st.b $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -21,9 +21,9 @@ define void @add_v8i16(<8 x i16>* %a, <8 x i16>* %b, <8 x i16>* %c) { ; P5600-LABEL: add_v8i16: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.h $w0, 0($4) -; P5600-NEXT: ld.h $w1, 0($5) -; P5600-NEXT: addv.h $w0, $w1, $w0 +; P5600-NEXT: ld.h $w1, 0($4) +; P5600-NEXT: ld.h $w0, 0($5) +; P5600-NEXT: addv.h $w0, $w0, $w1 ; P5600-NEXT: st.h $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -38,9 +38,9 @@ define void @add_v4i32(<4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c) { ; P5600-LABEL: add_v4i32: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.w $w0, 0($4) -; P5600-NEXT: ld.w $w1, 0($5) -; P5600-NEXT: addv.w $w0, $w1, $w0 +; P5600-NEXT: ld.w $w1, 0($4) +; P5600-NEXT: ld.w $w0, 0($5) +; P5600-NEXT: addv.w $w0, $w0, $w1 ; P5600-NEXT: st.w $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -55,9 +55,9 @@ define void @add_v2i64(<2 x i64>* %a, <2 x i64>* %b, <2 x i64>* %c) { ; P5600-LABEL: add_v2i64: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.d $w0, 0($4) -; P5600-NEXT: ld.d $w1, 0($5) -; P5600-NEXT: addv.d $w0, $w1, $w0 +; P5600-NEXT: ld.d $w1, 0($4) +; P5600-NEXT: ld.d $w0, 0($5) +; P5600-NEXT: addv.d $w0, $w0, $w1 ; P5600-NEXT: st.d $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/aggregate_struct_return.ll @@ -6,10 +6,10 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lwc1 $f0, 0($4) ; MIPS32-NEXT: lwc1 $f1, 4($4) -; MIPS32-NEXT: lwc1 $f2, 0($5) -; MIPS32-NEXT: lwc1 $f3, 4($5) -; MIPS32-NEXT: add.s $f0, $f0, $f2 -; MIPS32-NEXT: add.s $f2, $f1, $f3 +; MIPS32-NEXT: lwc1 $f3, 0($5) +; MIPS32-NEXT: lwc1 $f2, 4($5) +; MIPS32-NEXT: add.s $f0, $f0, $f3 +; MIPS32-NEXT: add.s $f2, $f1, $f2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -33,10 +33,10 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ldc1 $f0, 0($4) ; MIPS32-NEXT: ldc1 $f2, 8($4) -; MIPS32-NEXT: ldc1 $f4, 0($5) -; MIPS32-NEXT: ldc1 $f6, 8($5) -; MIPS32-NEXT: add.d $f0, $f0, $f4 -; MIPS32-NEXT: add.d $f2, $f2, $f6 +; MIPS32-NEXT: ldc1 $f6, 0($5) +; MIPS32-NEXT: ldc1 $f4, 8($5) +; MIPS32-NEXT: add.d $f0, $f0, $f6 +; MIPS32-NEXT: add.d $f2, $f2, $f4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -66,9 +66,9 @@ ; MIPS32-NEXT: sw $4, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: jal ret_complex_float ; MIPS32-NEXT: nop -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: swc1 $f0, 0($1) -; MIPS32-NEXT: swc1 $f2, 4($1) +; MIPS32-NEXT: lw $4, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: swc1 $f0, 0($4) +; MIPS32-NEXT: swc1 $f2, 4($4) ; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 24 ; MIPS32-NEXT: jr $ra @@ -95,9 +95,9 @@ ; MIPS32-NEXT: sw $4, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: jal ret_complex_double ; MIPS32-NEXT: nop -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sdc1 $f0, 0($1) -; MIPS32-NEXT: sdc1 $f2, 8($1) +; MIPS32-NEXT: lw $4, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sdc1 $f0, 0($4) +; MIPS32-NEXT: sdc1 $f2, 8($4) ; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 24 ; MIPS32-NEXT: jr $ra Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitreverse.ll @@ -6,64 +6,64 @@ define i32 @bitreverse_i32(i32 signext %a) { ; MIPS32-LABEL: bitreverse_i32: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sll $1, $4, 24 -; MIPS32-NEXT: srl $2, $4, 24 -; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 24 +; MIPS32-NEXT: srl $1, $4, 24 +; MIPS32-NEXT: or $1, $1, $2 ; MIPS32-NEXT: andi $2, $4, 65280 ; MIPS32-NEXT: sll $2, $2, 8 ; MIPS32-NEXT: or $1, $1, $2 ; MIPS32-NEXT: srl $2, $4, 8 ; MIPS32-NEXT: andi $2, $2, 65280 -; MIPS32-NEXT: or $1, $1, $2 -; MIPS32-NEXT: lui $2, 61680 -; MIPS32-NEXT: ori $2, $2, 61680 -; MIPS32-NEXT: and $3, $1, $2 -; MIPS32-NEXT: srl $3, $3, 4 -; MIPS32-NEXT: sll $1, $1, 4 -; MIPS32-NEXT: and $1, $1, $2 -; MIPS32-NEXT: or $1, $3, $1 -; MIPS32-NEXT: lui $2, 52428 -; MIPS32-NEXT: ori $2, $2, 52428 -; MIPS32-NEXT: and $3, $1, $2 -; MIPS32-NEXT: srl $3, $3, 2 -; MIPS32-NEXT: sll $1, $1, 2 -; MIPS32-NEXT: and $1, $1, $2 -; MIPS32-NEXT: or $1, $3, $1 -; MIPS32-NEXT: lui $2, 43690 -; MIPS32-NEXT: ori $2, $2, 43690 -; MIPS32-NEXT: and $3, $1, $2 -; MIPS32-NEXT: srl $3, $3, 1 -; MIPS32-NEXT: sll $1, $1, 1 -; MIPS32-NEXT: and $1, $1, $2 -; MIPS32-NEXT: or $2, $3, $1 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: lui $1, 61680 +; MIPS32-NEXT: ori $3, $1, 61680 +; MIPS32-NEXT: and $1, $2, $3 +; MIPS32-NEXT: srl $1, $1, 4 +; MIPS32-NEXT: sll $2, $2, 4 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: lui $1, 52428 +; MIPS32-NEXT: ori $3, $1, 52428 +; MIPS32-NEXT: and $1, $2, $3 +; MIPS32-NEXT: srl $1, $1, 2 +; MIPS32-NEXT: sll $2, $2, 2 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: lui $1, 43690 +; MIPS32-NEXT: ori $3, $1, 43690 +; MIPS32-NEXT: and $1, $2, $3 +; MIPS32-NEXT: srl $1, $1, 1 +; MIPS32-NEXT: sll $2, $2, 1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: or $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R2-LABEL: bitreverse_i32: ; MIPS32R2: # %bb.0: # %entry ; MIPS32R2-NEXT: wsbh $1, $4 -; MIPS32R2-NEXT: rotr $1, $1, 16 -; MIPS32R2-NEXT: lui $2, 61680 -; MIPS32R2-NEXT: ori $2, $2, 61680 -; MIPS32R2-NEXT: and $3, $1, $2 -; MIPS32R2-NEXT: srl $3, $3, 4 -; MIPS32R2-NEXT: sll $1, $1, 4 -; MIPS32R2-NEXT: and $1, $1, $2 -; MIPS32R2-NEXT: or $1, $3, $1 -; MIPS32R2-NEXT: lui $2, 52428 -; MIPS32R2-NEXT: ori $2, $2, 52428 -; MIPS32R2-NEXT: and $3, $1, $2 -; MIPS32R2-NEXT: srl $3, $3, 2 -; MIPS32R2-NEXT: sll $1, $1, 2 -; MIPS32R2-NEXT: and $1, $1, $2 -; MIPS32R2-NEXT: or $1, $3, $1 -; MIPS32R2-NEXT: lui $2, 43690 -; MIPS32R2-NEXT: ori $2, $2, 43690 -; MIPS32R2-NEXT: and $3, $1, $2 -; MIPS32R2-NEXT: srl $3, $3, 1 -; MIPS32R2-NEXT: sll $1, $1, 1 -; MIPS32R2-NEXT: and $1, $1, $2 -; MIPS32R2-NEXT: or $2, $3, $1 +; MIPS32R2-NEXT: rotr $2, $1, 16 +; MIPS32R2-NEXT: lui $1, 61680 +; MIPS32R2-NEXT: ori $3, $1, 61680 +; MIPS32R2-NEXT: and $1, $2, $3 +; MIPS32R2-NEXT: srl $1, $1, 4 +; MIPS32R2-NEXT: sll $2, $2, 4 +; MIPS32R2-NEXT: and $2, $2, $3 +; MIPS32R2-NEXT: or $2, $1, $2 +; MIPS32R2-NEXT: lui $1, 52428 +; MIPS32R2-NEXT: ori $3, $1, 52428 +; MIPS32R2-NEXT: and $1, $2, $3 +; MIPS32R2-NEXT: srl $1, $1, 2 +; MIPS32R2-NEXT: sll $2, $2, 2 +; MIPS32R2-NEXT: and $2, $2, $3 +; MIPS32R2-NEXT: or $2, $1, $2 +; MIPS32R2-NEXT: lui $1, 43690 +; MIPS32R2-NEXT: ori $3, $1, 43690 +; MIPS32R2-NEXT: and $1, $2, $3 +; MIPS32R2-NEXT: srl $1, $1, 1 +; MIPS32R2-NEXT: sll $2, $2, 1 +; MIPS32R2-NEXT: and $2, $2, $3 +; MIPS32R2-NEXT: or $2, $1, $2 ; MIPS32R2-NEXT: jr $ra ; MIPS32R2-NEXT: nop entry: @@ -75,107 +75,107 @@ define i64 @bitreverse_i64(i64 signext %a) { ; MIPS32-LABEL: bitreverse_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sll $1, $5, 24 -; MIPS32-NEXT: srl $2, $5, 24 -; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: move $3, $4 +; MIPS32-NEXT: sll $2, $5, 24 +; MIPS32-NEXT: srl $1, $5, 24 +; MIPS32-NEXT: or $1, $1, $2 ; MIPS32-NEXT: andi $2, $5, 65280 ; MIPS32-NEXT: sll $2, $2, 8 ; MIPS32-NEXT: or $1, $1, $2 ; MIPS32-NEXT: srl $2, $5, 8 ; MIPS32-NEXT: andi $2, $2, 65280 -; MIPS32-NEXT: or $1, $1, $2 -; MIPS32-NEXT: lui $2, 61680 -; MIPS32-NEXT: ori $2, $2, 61680 -; MIPS32-NEXT: and $3, $1, $2 -; MIPS32-NEXT: srl $3, $3, 4 -; MIPS32-NEXT: sll $1, $1, 4 -; MIPS32-NEXT: and $1, $1, $2 -; MIPS32-NEXT: or $1, $3, $1 -; MIPS32-NEXT: lui $3, 52428 -; MIPS32-NEXT: ori $3, $3, 52428 -; MIPS32-NEXT: and $5, $1, $3 -; MIPS32-NEXT: srl $5, $5, 2 -; MIPS32-NEXT: sll $1, $1, 2 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: or $1, $5, $1 -; MIPS32-NEXT: lui $5, 43690 -; MIPS32-NEXT: ori $5, $5, 43690 -; MIPS32-NEXT: and $6, $1, $5 -; MIPS32-NEXT: srl $6, $6, 1 -; MIPS32-NEXT: sll $1, $1, 1 -; MIPS32-NEXT: and $1, $1, $5 -; MIPS32-NEXT: or $1, $6, $1 -; MIPS32-NEXT: sll $6, $4, 24 -; MIPS32-NEXT: srl $7, $4, 24 -; MIPS32-NEXT: or $6, $7, $6 -; MIPS32-NEXT: andi $7, $4, 65280 -; MIPS32-NEXT: sll $7, $7, 8 -; MIPS32-NEXT: or $6, $6, $7 -; MIPS32-NEXT: srl $4, $4, 8 -; MIPS32-NEXT: andi $4, $4, 65280 -; MIPS32-NEXT: or $4, $6, $4 -; MIPS32-NEXT: and $6, $4, $2 -; MIPS32-NEXT: srl $6, $6, 4 -; MIPS32-NEXT: sll $4, $4, 4 -; MIPS32-NEXT: and $2, $4, $2 -; MIPS32-NEXT: or $2, $6, $2 -; MIPS32-NEXT: and $4, $2, $3 -; MIPS32-NEXT: srl $4, $4, 2 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: lui $1, 61680 +; MIPS32-NEXT: ori $6, $1, 61680 +; MIPS32-NEXT: and $1, $2, $6 +; MIPS32-NEXT: srl $1, $1, 4 +; MIPS32-NEXT: sll $2, $2, 4 +; MIPS32-NEXT: and $2, $2, $6 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: lui $1, 52428 +; MIPS32-NEXT: ori $5, $1, 52428 +; MIPS32-NEXT: and $1, $2, $5 +; MIPS32-NEXT: srl $1, $1, 2 ; MIPS32-NEXT: sll $2, $2, 2 -; MIPS32-NEXT: and $2, $2, $3 -; MIPS32-NEXT: or $2, $4, $2 -; MIPS32-NEXT: and $3, $2, $5 -; MIPS32-NEXT: srl $3, $3, 1 -; MIPS32-NEXT: sll $2, $2, 1 ; MIPS32-NEXT: and $2, $2, $5 -; MIPS32-NEXT: or $3, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: lui $1, 43690 +; MIPS32-NEXT: ori $4, $1, 43690 +; MIPS32-NEXT: and $1, $2, $4 +; MIPS32-NEXT: srl $1, $1, 1 +; MIPS32-NEXT: sll $2, $2, 1 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: or $2, $1, $2 +; MIPS32-NEXT: sll $7, $3, 24 +; MIPS32-NEXT: srl $1, $3, 24 +; MIPS32-NEXT: or $1, $1, $7 +; MIPS32-NEXT: andi $7, $3, 65280 +; MIPS32-NEXT: sll $7, $7, 8 +; MIPS32-NEXT: or $1, $1, $7 +; MIPS32-NEXT: srl $3, $3, 8 +; MIPS32-NEXT: andi $3, $3, 65280 +; MIPS32-NEXT: or $3, $1, $3 +; MIPS32-NEXT: and $1, $3, $6 +; MIPS32-NEXT: srl $1, $1, 4 +; MIPS32-NEXT: sll $3, $3, 4 +; MIPS32-NEXT: and $3, $3, $6 +; MIPS32-NEXT: or $3, $1, $3 +; MIPS32-NEXT: and $1, $3, $5 +; MIPS32-NEXT: srl $1, $1, 2 +; MIPS32-NEXT: sll $3, $3, 2 +; MIPS32-NEXT: and $3, $3, $5 +; MIPS32-NEXT: or $3, $1, $3 +; MIPS32-NEXT: and $1, $3, $4 +; MIPS32-NEXT: srl $1, $1, 1 +; MIPS32-NEXT: sll $3, $3, 1 +; MIPS32-NEXT: and $3, $3, $4 +; MIPS32-NEXT: or $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R2-LABEL: bitreverse_i64: ; MIPS32R2: # %bb.0: # %entry -; MIPS32R2-NEXT: wsbh $1, $5 -; MIPS32R2-NEXT: rotr $1, $1, 16 +; MIPS32R2-NEXT: move $1, $4 +; MIPS32R2-NEXT: wsbh $2, $5 +; MIPS32R2-NEXT: rotr $3, $2, 16 ; MIPS32R2-NEXT: lui $2, 61680 -; MIPS32R2-NEXT: ori $2, $2, 61680 -; MIPS32R2-NEXT: and $3, $1, $2 -; MIPS32R2-NEXT: srl $3, $3, 4 -; MIPS32R2-NEXT: sll $1, $1, 4 -; MIPS32R2-NEXT: and $1, $1, $2 -; MIPS32R2-NEXT: or $1, $3, $1 -; MIPS32R2-NEXT: lui $3, 52428 -; MIPS32R2-NEXT: ori $3, $3, 52428 -; MIPS32R2-NEXT: and $5, $1, $3 -; MIPS32R2-NEXT: srl $5, $5, 2 -; MIPS32R2-NEXT: sll $1, $1, 2 -; MIPS32R2-NEXT: and $1, $1, $3 -; MIPS32R2-NEXT: or $1, $5, $1 -; MIPS32R2-NEXT: lui $5, 43690 -; MIPS32R2-NEXT: ori $5, $5, 43690 -; MIPS32R2-NEXT: and $6, $1, $5 -; MIPS32R2-NEXT: srl $6, $6, 1 -; MIPS32R2-NEXT: sll $1, $1, 1 -; MIPS32R2-NEXT: and $1, $1, $5 -; MIPS32R2-NEXT: or $1, $6, $1 -; MIPS32R2-NEXT: wsbh $4, $4 -; MIPS32R2-NEXT: rotr $4, $4, 16 -; MIPS32R2-NEXT: and $6, $4, $2 -; MIPS32R2-NEXT: srl $6, $6, 4 -; MIPS32R2-NEXT: sll $4, $4, 4 -; MIPS32R2-NEXT: and $2, $4, $2 -; MIPS32R2-NEXT: or $2, $6, $2 -; MIPS32R2-NEXT: and $4, $2, $3 -; MIPS32R2-NEXT: srl $4, $4, 2 -; MIPS32R2-NEXT: sll $2, $2, 2 -; MIPS32R2-NEXT: and $2, $2, $3 -; MIPS32R2-NEXT: or $2, $4, $2 -; MIPS32R2-NEXT: and $3, $2, $5 -; MIPS32R2-NEXT: srl $3, $3, 1 -; MIPS32R2-NEXT: sll $2, $2, 1 -; MIPS32R2-NEXT: and $2, $2, $5 -; MIPS32R2-NEXT: or $3, $3, $2 -; MIPS32R2-NEXT: move $2, $1 +; MIPS32R2-NEXT: ori $6, $2, 61680 +; MIPS32R2-NEXT: and $2, $3, $6 +; MIPS32R2-NEXT: srl $2, $2, 4 +; MIPS32R2-NEXT: sll $3, $3, 4 +; MIPS32R2-NEXT: and $3, $3, $6 +; MIPS32R2-NEXT: or $3, $2, $3 +; MIPS32R2-NEXT: lui $2, 52428 +; MIPS32R2-NEXT: ori $5, $2, 52428 +; MIPS32R2-NEXT: and $2, $3, $5 +; MIPS32R2-NEXT: srl $2, $2, 2 +; MIPS32R2-NEXT: sll $3, $3, 2 +; MIPS32R2-NEXT: and $3, $3, $5 +; MIPS32R2-NEXT: or $3, $2, $3 +; MIPS32R2-NEXT: lui $2, 43690 +; MIPS32R2-NEXT: ori $4, $2, 43690 +; MIPS32R2-NEXT: and $2, $3, $4 +; MIPS32R2-NEXT: srl $2, $2, 1 +; MIPS32R2-NEXT: sll $3, $3, 1 +; MIPS32R2-NEXT: and $3, $3, $4 +; MIPS32R2-NEXT: or $2, $2, $3 +; MIPS32R2-NEXT: wsbh $1, $1 +; MIPS32R2-NEXT: rotr $3, $1, 16 +; MIPS32R2-NEXT: and $1, $3, $6 +; MIPS32R2-NEXT: srl $1, $1, 4 +; MIPS32R2-NEXT: sll $3, $3, 4 +; MIPS32R2-NEXT: and $3, $3, $6 +; MIPS32R2-NEXT: or $3, $1, $3 +; MIPS32R2-NEXT: and $1, $3, $5 +; MIPS32R2-NEXT: srl $1, $1, 2 +; MIPS32R2-NEXT: sll $3, $3, 2 +; MIPS32R2-NEXT: and $3, $3, $5 +; MIPS32R2-NEXT: or $3, $1, $3 +; MIPS32R2-NEXT: and $1, $3, $4 +; MIPS32R2-NEXT: srl $1, $1, 1 +; MIPS32R2-NEXT: sll $3, $3, 1 +; MIPS32R2-NEXT: and $3, $3, $4 +; MIPS32R2-NEXT: or $3, $1, $3 ; MIPS32R2-NEXT: jr $ra ; MIPS32R2-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bitwise.ll @@ -320,10 +320,10 @@ ; MIPS32-LABEL: ashr_i8: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $1, $zero, 2 -; MIPS32-NEXT: andi $1, $1, 255 -; MIPS32-NEXT: sll $2, $4, 24 -; MIPS32-NEXT: sra $2, $2, 24 -; MIPS32-NEXT: srav $2, $2, $1 +; MIPS32-NEXT: andi $2, $1, 255 +; MIPS32-NEXT: sll $1, $4, 24 +; MIPS32-NEXT: sra $1, $1, 24 +; MIPS32-NEXT: srav $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -335,9 +335,9 @@ ; MIPS32-LABEL: lshr_i16: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $1, $zero, 2 -; MIPS32-NEXT: andi $1, $1, 65535 -; MIPS32-NEXT: andi $2, $4, 65535 -; MIPS32-NEXT: srlv $2, $2, $1 +; MIPS32-NEXT: andi $2, $1, 65535 +; MIPS32-NEXT: andi $1, $4, 65535 +; MIPS32-NEXT: srlv $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -348,29 +348,25 @@ define i64 @shl_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: shl_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $sp, $sp, -8 -; MIPS32-NEXT: .cfi_def_cfa_offset 8 +; MIPS32-NEXT: move $3, $4 +; MIPS32-NEXT: move $9, $6 ; MIPS32-NEXT: ori $1, $zero, 32 -; MIPS32-NEXT: subu $2, $6, $1 -; MIPS32-NEXT: subu $3, $1, $6 -; MIPS32-NEXT: ori $8, $zero, 0 -; MIPS32-NEXT: sltu $1, $6, $1 -; MIPS32-NEXT: sltiu $9, $6, 1 -; MIPS32-NEXT: sllv $10, $4, $6 -; MIPS32-NEXT: srlv $3, $4, $3 -; MIPS32-NEXT: sllv $6, $5, $6 -; MIPS32-NEXT: or $3, $3, $6 -; MIPS32-NEXT: sllv $2, $4, $2 -; MIPS32-NEXT: andi $4, $1, 1 -; MIPS32-NEXT: movn $8, $10, $4 +; MIPS32-NEXT: subu $8, $9, $1 +; MIPS32-NEXT: subu $4, $1, $9 +; MIPS32-NEXT: ori $2, $zero, 0 +; MIPS32-NEXT: sltu $6, $9, $1 +; MIPS32-NEXT: sltiu $1, $9, 1 +; MIPS32-NEXT: sllv $7, $3, $9 +; MIPS32-NEXT: srlv $4, $3, $4 +; MIPS32-NEXT: sllv $9, $5, $9 +; MIPS32-NEXT: or $4, $4, $9 +; MIPS32-NEXT: sllv $3, $3, $8 +; MIPS32-NEXT: andi $8, $6, 1 +; MIPS32-NEXT: movn $2, $7, $8 +; MIPS32-NEXT: andi $6, $6, 1 +; MIPS32-NEXT: movn $3, $4, $6 ; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn $2, $3, $1 -; MIPS32-NEXT: andi $1, $9, 1 -; MIPS32-NEXT: movn $2, $5, $1 -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: move $2, $8 -; MIPS32-NEXT: lw $3, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: addiu $sp, $sp, 8 +; MIPS32-NEXT: movn $3, $5, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -381,24 +377,30 @@ define i64 @ashl_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: ashl_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $1, $zero, 32 -; MIPS32-NEXT: subu $2, $6, $1 -; MIPS32-NEXT: subu $3, $1, $6 -; MIPS32-NEXT: sltu $1, $6, $1 -; MIPS32-NEXT: sltiu $8, $6, 1 -; MIPS32-NEXT: srav $9, $5, $6 -; MIPS32-NEXT: srlv $6, $4, $6 -; MIPS32-NEXT: sllv $3, $5, $3 -; MIPS32-NEXT: or $3, $6, $3 -; MIPS32-NEXT: sra $6, $5, 31 -; MIPS32-NEXT: srav $2, $5, $2 -; MIPS32-NEXT: andi $5, $1, 1 -; MIPS32-NEXT: movn $2, $3, $5 -; MIPS32-NEXT: andi $3, $8, 1 -; MIPS32-NEXT: movn $2, $4, $3 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn $6, $9, $1 +; MIPS32-NEXT: addiu $sp, $sp, -8 +; MIPS32-NEXT: .cfi_def_cfa_offset 8 +; MIPS32-NEXT: sw $4, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $2, $5 +; MIPS32-NEXT: lw $5, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: move $3, $6 +; MIPS32-NEXT: ori $1, $zero, 32 +; MIPS32-NEXT: subu $8, $3, $1 +; MIPS32-NEXT: subu $7, $1, $3 +; MIPS32-NEXT: sltu $4, $3, $1 +; MIPS32-NEXT: sltiu $6, $3, 1 +; MIPS32-NEXT: srav $1, $2, $3 +; MIPS32-NEXT: srlv $3, $5, $3 +; MIPS32-NEXT: sllv $7, $2, $7 +; MIPS32-NEXT: or $7, $3, $7 +; MIPS32-NEXT: sra $3, $2, 31 +; MIPS32-NEXT: srav $2, $2, $8 +; MIPS32-NEXT: andi $8, $4, 1 +; MIPS32-NEXT: movn $2, $7, $8 +; MIPS32-NEXT: andi $6, $6, 1 +; MIPS32-NEXT: movn $2, $5, $6 +; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: movn $3, $1, $4 +; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -409,24 +411,30 @@ define i64 @lshr_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: lshr_i64: ; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: addiu $sp, $sp, -8 +; MIPS32-NEXT: .cfi_def_cfa_offset 8 +; MIPS32-NEXT: sw $4, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $2, $5 +; MIPS32-NEXT: lw $5, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: move $7, $6 ; MIPS32-NEXT: ori $1, $zero, 32 -; MIPS32-NEXT: subu $2, $6, $1 -; MIPS32-NEXT: subu $3, $1, $6 -; MIPS32-NEXT: ori $8, $zero, 0 -; MIPS32-NEXT: sltu $1, $6, $1 -; MIPS32-NEXT: sltiu $9, $6, 1 -; MIPS32-NEXT: srlv $10, $5, $6 -; MIPS32-NEXT: srlv $6, $4, $6 -; MIPS32-NEXT: sllv $3, $5, $3 -; MIPS32-NEXT: or $3, $6, $3 -; MIPS32-NEXT: srlv $2, $5, $2 -; MIPS32-NEXT: andi $5, $1, 1 -; MIPS32-NEXT: movn $2, $3, $5 -; MIPS32-NEXT: andi $3, $9, 1 -; MIPS32-NEXT: movn $2, $4, $3 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn $8, $10, $1 -; MIPS32-NEXT: move $3, $8 +; MIPS32-NEXT: subu $8, $7, $1 +; MIPS32-NEXT: subu $9, $1, $7 +; MIPS32-NEXT: ori $3, $zero, 0 +; MIPS32-NEXT: sltu $4, $7, $1 +; MIPS32-NEXT: sltiu $6, $7, 1 +; MIPS32-NEXT: srlv $1, $2, $7 +; MIPS32-NEXT: srlv $7, $5, $7 +; MIPS32-NEXT: sllv $9, $2, $9 +; MIPS32-NEXT: or $7, $7, $9 +; MIPS32-NEXT: srlv $2, $2, $8 +; MIPS32-NEXT: andi $8, $4, 1 +; MIPS32-NEXT: movn $2, $7, $8 +; MIPS32-NEXT: andi $6, $6, 1 +; MIPS32-NEXT: movn $2, $5, $6 +; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: movn $3, $1, $4 +; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/branch.ll @@ -30,21 +30,21 @@ ; MIPS32: # %bb.0: ; MIPS32-NEXT: addiu $sp, $sp, -8 ; MIPS32-NEXT: .cfi_def_cfa_offset 8 +; MIPS32-NEXT: sw $5, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: sw $5, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: bnez $1, $BB1_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: ; MIPS32-NEXT: j $BB1_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_2: # %if.then -; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_3: # %if.else -; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/brindirect.ll @@ -6,19 +6,19 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -8 ; MIPS32-NEXT: .cfi_def_cfa_offset 8 -; MIPS32-NEXT: ori $2, $zero, 1 -; MIPS32-NEXT: ori $1, $zero, 0 -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $1, $zero, 1 ; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $1, $zero, 0 +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: jr $4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_1: # %L1 -; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_2: # %L2 -; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 8 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/bswap.ll @@ -6,9 +6,9 @@ define i32 @bswap_i32(i32 %x) { ; MIPS32-LABEL: bswap_i32: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sll $1, $4, 24 -; MIPS32-NEXT: srl $2, $4, 24 -; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: sll $2, $4, 24 +; MIPS32-NEXT: srl $1, $4, 24 +; MIPS32-NEXT: or $1, $1, $2 ; MIPS32-NEXT: andi $2, $4, 65280 ; MIPS32-NEXT: sll $2, $2, 8 ; MIPS32-NEXT: or $1, $1, $2 @@ -33,18 +33,18 @@ define i64 @bswap_i64(i64 %x) { ; MIPS32-LABEL: bswap_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sll $1, $5, 24 -; MIPS32-NEXT: srl $2, $5, 24 -; MIPS32-NEXT: or $1, $2, $1 +; MIPS32-NEXT: sll $2, $5, 24 +; MIPS32-NEXT: srl $1, $5, 24 +; MIPS32-NEXT: or $1, $1, $2 ; MIPS32-NEXT: andi $2, $5, 65280 ; MIPS32-NEXT: sll $2, $2, 8 ; MIPS32-NEXT: or $1, $1, $2 ; MIPS32-NEXT: srl $2, $5, 8 ; MIPS32-NEXT: andi $2, $2, 65280 ; MIPS32-NEXT: or $2, $1, $2 -; MIPS32-NEXT: sll $1, $4, 24 -; MIPS32-NEXT: srl $3, $4, 24 -; MIPS32-NEXT: or $1, $3, $1 +; MIPS32-NEXT: sll $3, $4, 24 +; MIPS32-NEXT: srl $1, $4, 24 +; MIPS32-NEXT: or $1, $1, $3 ; MIPS32-NEXT: andi $3, $4, 65280 ; MIPS32-NEXT: sll $3, $3, 8 ; MIPS32-NEXT: or $1, $1, $3 Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/call.ll @@ -29,11 +29,10 @@ ; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24 ; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32_PIC-NEXT: .cfi_offset 31, -4 -; MIPS32_PIC-NEXT: addu $1, $2, $25 -; MIPS32_PIC-NEXT: lw $25, %call16(f)($1) +; MIPS32_PIC-NEXT: addu $gp, $2, $25 ; MIPS32_PIC-NEXT: move $4, $6 ; MIPS32_PIC-NEXT: move $5, $7 -; MIPS32_PIC-NEXT: move $gp, $1 +; MIPS32_PIC-NEXT: lw $25, %call16(f)($gp) ; MIPS32_PIC-NEXT: jalr $25 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: addu $2, $2, $2 @@ -89,12 +88,11 @@ ; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24 ; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32_PIC-NEXT: .cfi_offset 31, -4 -; MIPS32_PIC-NEXT: addu $1, $2, $25 -; MIPS32_PIC-NEXT: lw $2, %got(f_with_local_linkage)($1) -; MIPS32_PIC-NEXT: addiu $25, $2, %lo(f_with_local_linkage) +; MIPS32_PIC-NEXT: addu $gp, $2, $25 ; MIPS32_PIC-NEXT: move $4, $6 ; MIPS32_PIC-NEXT: move $5, $7 -; MIPS32_PIC-NEXT: move $gp, $1 +; MIPS32_PIC-NEXT: lw $1, %got(f_with_local_linkage)($gp) +; MIPS32_PIC-NEXT: addiu $25, $1, %lo(f_with_local_linkage) ; MIPS32_PIC-NEXT: jalr $25 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: addu $2, $2, $2 @@ -115,10 +113,9 @@ ; MIPS32-NEXT: .cfi_def_cfa_offset 24 ; MIPS32-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: .cfi_offset 31, -4 -; MIPS32-NEXT: sw $4, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $25, $4 ; MIPS32-NEXT: move $4, $5 ; MIPS32-NEXT: move $5, $6 -; MIPS32-NEXT: lw $25, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: jalr $25 ; MIPS32-NEXT: nop ; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload @@ -132,10 +129,9 @@ ; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24 ; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32_PIC-NEXT: .cfi_offset 31, -4 -; MIPS32_PIC-NEXT: sw $4, 16($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: move $25, $4 ; MIPS32_PIC-NEXT: move $4, $5 ; MIPS32_PIC-NEXT: move $5, $6 -; MIPS32_PIC-NEXT: lw $25, 16($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: jalr $25 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctlz.ll @@ -17,14 +17,14 @@ define i64 @ctlz_i64(i64 %a) { ; MIPS32-LABEL: ctlz_i64: ; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: move $1, $4 ; MIPS32-NEXT: ori $3, $zero, 0 -; MIPS32-NEXT: sltiu $1, $5, 1 -; MIPS32-NEXT: clz $2, $4 -; MIPS32-NEXT: addiu $2, $2, 32 -; MIPS32-NEXT: clz $4, $5 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn $4, $2, $1 -; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: sltiu $4, $5, 1 +; MIPS32-NEXT: clz $1, $1 +; MIPS32-NEXT: addiu $1, $1, 32 +; MIPS32-NEXT: clz $2, $5 +; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: movn $2, $1, $4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/ctpop.ll @@ -8,15 +8,15 @@ ; MIPS32-NEXT: lui $2, 21845 ; MIPS32-NEXT: ori $2, $2, 21845 ; MIPS32-NEXT: and $1, $1, $2 -; MIPS32-NEXT: subu $1, $4, $1 -; MIPS32-NEXT: srl $2, $1, 2 +; MIPS32-NEXT: subu $2, $4, $1 +; MIPS32-NEXT: srl $1, $2, 2 ; MIPS32-NEXT: lui $3, 13107 ; MIPS32-NEXT: ori $3, $3, 13107 -; MIPS32-NEXT: and $2, $2, $3 ; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: addu $1, $2, $1 -; MIPS32-NEXT: srl $2, $1, 4 -; MIPS32-NEXT: addu $1, $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: addu $2, $1, $2 +; MIPS32-NEXT: srl $1, $2, 4 +; MIPS32-NEXT: addu $1, $1, $2 ; MIPS32-NEXT: lui $2, 3855 ; MIPS32-NEXT: ori $2, $2, 3855 ; MIPS32-NEXT: and $1, $1, $2 @@ -38,37 +38,37 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: srl $1, $4, 1 ; MIPS32-NEXT: lui $2, 21845 -; MIPS32-NEXT: ori $2, $2, 21845 -; MIPS32-NEXT: and $1, $1, $2 -; MIPS32-NEXT: subu $1, $4, $1 -; MIPS32-NEXT: srl $3, $1, 2 -; MIPS32-NEXT: lui $4, 13107 -; MIPS32-NEXT: ori $4, $4, 13107 -; MIPS32-NEXT: and $3, $3, $4 +; MIPS32-NEXT: ori $7, $2, 21845 +; MIPS32-NEXT: and $1, $1, $7 +; MIPS32-NEXT: subu $2, $4, $1 +; MIPS32-NEXT: srl $1, $2, 2 +; MIPS32-NEXT: lui $3, 13107 +; MIPS32-NEXT: ori $6, $3, 13107 +; MIPS32-NEXT: and $1, $1, $6 +; MIPS32-NEXT: and $2, $2, $6 +; MIPS32-NEXT: addu $2, $1, $2 +; MIPS32-NEXT: srl $1, $2, 4 +; MIPS32-NEXT: addu $1, $1, $2 +; MIPS32-NEXT: lui $2, 3855 +; MIPS32-NEXT: ori $4, $2, 3855 ; MIPS32-NEXT: and $1, $1, $4 -; MIPS32-NEXT: addu $1, $3, $1 -; MIPS32-NEXT: srl $3, $1, 4 -; MIPS32-NEXT: addu $1, $3, $1 -; MIPS32-NEXT: lui $3, 3855 -; MIPS32-NEXT: ori $3, $3, 3855 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: lui $6, 257 -; MIPS32-NEXT: ori $6, $6, 257 -; MIPS32-NEXT: mul $1, $1, $6 +; MIPS32-NEXT: lui $2, 257 +; MIPS32-NEXT: ori $3, $2, 257 +; MIPS32-NEXT: mul $1, $1, $3 +; MIPS32-NEXT: srl $2, $1, 24 +; MIPS32-NEXT: srl $1, $5, 1 +; MIPS32-NEXT: and $1, $1, $7 +; MIPS32-NEXT: subu $5, $5, $1 +; MIPS32-NEXT: srl $1, $5, 2 +; MIPS32-NEXT: and $1, $1, $6 +; MIPS32-NEXT: and $5, $5, $6 +; MIPS32-NEXT: addu $5, $1, $5 +; MIPS32-NEXT: srl $1, $5, 4 +; MIPS32-NEXT: addu $1, $1, $5 +; MIPS32-NEXT: and $1, $1, $4 +; MIPS32-NEXT: mul $1, $1, $3 ; MIPS32-NEXT: srl $1, $1, 24 -; MIPS32-NEXT: srl $7, $5, 1 -; MIPS32-NEXT: and $2, $7, $2 -; MIPS32-NEXT: subu $2, $5, $2 -; MIPS32-NEXT: srl $5, $2, 2 -; MIPS32-NEXT: and $5, $5, $4 -; MIPS32-NEXT: and $2, $2, $4 -; MIPS32-NEXT: addu $2, $5, $2 -; MIPS32-NEXT: srl $4, $2, 4 -; MIPS32-NEXT: addu $2, $4, $2 -; MIPS32-NEXT: and $2, $2, $3 -; MIPS32-NEXT: mul $2, $2, $6 -; MIPS32-NEXT: srl $2, $2, 24 -; MIPS32-NEXT: addu $2, $2, $1 +; MIPS32-NEXT: addu $2, $1, $2 ; MIPS32-NEXT: ori $3, $zero, 0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/cttz.ll @@ -6,10 +6,10 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: not $1, $4 ; MIPS32-NEXT: addiu $2, $4, -1 -; MIPS32-NEXT: and $1, $1, $2 -; MIPS32-NEXT: ori $2, $zero, 32 -; MIPS32-NEXT: clz $1, $1 -; MIPS32-NEXT: subu $2, $2, $1 +; MIPS32-NEXT: and $2, $1, $2 +; MIPS32-NEXT: ori $1, $zero, 32 +; MIPS32-NEXT: clz $2, $2 +; MIPS32-NEXT: subu $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -21,23 +21,23 @@ define i64 @cttz_i64(i64 %a) { ; MIPS32-LABEL: cttz_i64: ; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: move $6, $4 ; MIPS32-NEXT: ori $3, $zero, 0 -; MIPS32-NEXT: sltiu $1, $4, 1 -; MIPS32-NEXT: not $2, $5 -; MIPS32-NEXT: addiu $5, $5, -1 -; MIPS32-NEXT: and $2, $2, $5 -; MIPS32-NEXT: ori $5, $zero, 32 -; MIPS32-NEXT: clz $2, $2 -; MIPS32-NEXT: subu $2, $5, $2 -; MIPS32-NEXT: addiu $2, $2, 32 -; MIPS32-NEXT: not $6, $4 -; MIPS32-NEXT: addiu $4, $4, -1 -; MIPS32-NEXT: and $4, $6, $4 -; MIPS32-NEXT: clz $4, $4 -; MIPS32-NEXT: subu $4, $5, $4 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn $4, $2, $1 -; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: sltiu $4, $6, 1 +; MIPS32-NEXT: not $1, $5 +; MIPS32-NEXT: addiu $2, $5, -1 +; MIPS32-NEXT: and $1, $1, $2 +; MIPS32-NEXT: ori $2, $zero, 32 +; MIPS32-NEXT: clz $1, $1 +; MIPS32-NEXT: subu $1, $2, $1 +; MIPS32-NEXT: addiu $1, $1, 32 +; MIPS32-NEXT: not $5, $6 +; MIPS32-NEXT: addiu $6, $6, -1 +; MIPS32-NEXT: and $5, $5, $6 +; MIPS32-NEXT: clz $5, $5 +; MIPS32-NEXT: subu $2, $2, $5 +; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: movn $2, $1, $4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -53,10 +53,10 @@ ; MIPS32-NEXT: ori $1, $zero, 0 ; MIPS32-NEXT: not $2, $4 ; MIPS32-NEXT: addiu $3, $4, -1 -; MIPS32-NEXT: and $2, $2, $3 -; MIPS32-NEXT: ori $3, $zero, 32 -; MIPS32-NEXT: clz $2, $2 -; MIPS32-NEXT: subu $2, $3, $2 +; MIPS32-NEXT: and $3, $2, $3 +; MIPS32-NEXT: ori $2, $zero, 32 +; MIPS32-NEXT: clz $3, $3 +; MIPS32-NEXT: subu $2, $2, $3 ; MIPS32-NEXT: addiu $2, $2, 1 ; MIPS32-NEXT: sltiu $3, $4, 1 ; MIPS32-NEXT: andi $3, $3, 1 @@ -74,37 +74,35 @@ define i64 @ffs_i64_expansion(i64 %a) { ; MIPS32-LABEL: ffs_i64_expansion: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $1, $zero, 1 -; MIPS32-NEXT: ori $2, $zero, 0 -; MIPS32-NEXT: sltiu $3, $4, 1 -; MIPS32-NEXT: not $6, $5 -; MIPS32-NEXT: addiu $7, $5, -1 -; MIPS32-NEXT: and $6, $6, $7 -; MIPS32-NEXT: ori $7, $zero, 32 +; MIPS32-NEXT: ori $3, $zero, 1 +; MIPS32-NEXT: ori $1, $zero, 0 +; MIPS32-NEXT: sltiu $7, $4, 1 +; MIPS32-NEXT: not $2, $5 +; MIPS32-NEXT: addiu $6, $5, -1 +; MIPS32-NEXT: and $6, $2, $6 +; MIPS32-NEXT: ori $2, $zero, 32 ; MIPS32-NEXT: clz $6, $6 -; MIPS32-NEXT: subu $6, $7, $6 +; MIPS32-NEXT: subu $6, $2, $6 ; MIPS32-NEXT: addiu $6, $6, 32 ; MIPS32-NEXT: not $8, $4 ; MIPS32-NEXT: addiu $9, $4, -1 ; MIPS32-NEXT: and $8, $8, $9 ; MIPS32-NEXT: clz $8, $8 -; MIPS32-NEXT: subu $7, $7, $8 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $7, $6, $3 -; MIPS32-NEXT: addiu $3, $7, 1 -; MIPS32-NEXT: sltu $1, $3, $1 -; MIPS32-NEXT: addiu $6, $2, 0 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: addu $1, $6, $1 +; MIPS32-NEXT: subu $2, $2, $8 +; MIPS32-NEXT: andi $7, $7, 1 +; MIPS32-NEXT: movn $2, $6, $7 +; MIPS32-NEXT: addiu $2, $2, 1 +; MIPS32-NEXT: sltu $6, $2, $3 +; MIPS32-NEXT: addiu $3, $1, 0 +; MIPS32-NEXT: andi $6, $6, 1 +; MIPS32-NEXT: addu $3, $3, $6 ; MIPS32-NEXT: xori $4, $4, 0 ; MIPS32-NEXT: xori $5, $5, 0 ; MIPS32-NEXT: or $4, $4, $5 ; MIPS32-NEXT: sltiu $4, $4, 1 ; MIPS32-NEXT: andi $4, $4, 1 -; MIPS32-NEXT: movn $3, $2, $4 -; MIPS32-NEXT: movn $1, $2, $4 -; MIPS32-NEXT: move $2, $3 -; MIPS32-NEXT: move $3, $1 +; MIPS32-NEXT: movn $2, $1, $4 +; MIPS32-NEXT: movn $3, $1, $4 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/dyn_stackalloc.ll @@ -15,35 +15,32 @@ ; MIPS32-NEXT: .cfi_offset 30, -8 ; MIPS32-NEXT: move $fp, $sp ; MIPS32-NEXT: .cfi_def_cfa_register 30 -; MIPS32-NEXT: ori $1, $zero, 1 -; MIPS32-NEXT: ori $2, $zero, 0 -; MIPS32-NEXT: addiu $3, $5, 1 -; MIPS32-NEXT: mul $1, $3, $1 +; MIPS32-NEXT: sw $4, 8($fp) # 4-byte Folded Spill +; MIPS32-NEXT: move $6, $5 +; MIPS32-NEXT: lw $5, 8($fp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $6, 12($fp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $2, $zero, 1 +; MIPS32-NEXT: ori $1, $zero, 0 +; MIPS32-NEXT: sw $1, 16($fp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $6, 1 +; MIPS32-NEXT: mul $1, $1, $2 ; MIPS32-NEXT: addiu $1, $1, 7 -; MIPS32-NEXT: addiu $3, $zero, 65528 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: move $3, $sp -; MIPS32-NEXT: subu $1, $3, $1 -; MIPS32-NEXT: move $sp, $1 -; MIPS32-NEXT: addiu $sp, $sp, -16 +; MIPS32-NEXT: addiu $2, $zero, 65528 +; MIPS32-NEXT: and $2, $1, $2 +; MIPS32-NEXT: move $1, $sp +; MIPS32-NEXT: subu $4, $1, $2 ; MIPS32-NEXT: sw $4, 20($fp) # 4-byte Folded Spill -; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: lw $3, 20($fp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $5, 16($fp) # 4-byte Folded Spill -; MIPS32-NEXT: move $5, $3 -; MIPS32-NEXT: lw $6, 16($fp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $2, 12($fp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $1, 8($fp) # 4-byte Folded Spill +; MIPS32-NEXT: move $sp, $4 +; MIPS32-NEXT: addiu $sp, $sp, -16 ; MIPS32-NEXT: jal memset ; MIPS32-NEXT: nop +; MIPS32-NEXT: lw $5, 12($fp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 16($fp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $4, 20($fp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 16 -; MIPS32-NEXT: lw $1, 8($fp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 16($fp) # 4-byte Folded Reload -; MIPS32-NEXT: addu $3, $1, $2 -; MIPS32-NEXT: lw $4, 12($fp) # 4-byte Folded Reload -; MIPS32-NEXT: sb $4, 0($3) +; MIPS32-NEXT: addu $2, $4, $5 +; MIPS32-NEXT: sb $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, -16 -; MIPS32-NEXT: move $4, $1 ; MIPS32-NEXT: jal puts ; MIPS32-NEXT: nop ; MIPS32-NEXT: addiu $sp, $sp, 16 Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fcmp.ll @@ -27,10 +27,9 @@ define i1 @uno_s(float %x, float %y) { ; MIPS32-LABEL: uno_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.un.s $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -40,10 +39,9 @@ define i1 @ord_s(float %x, float %y) { ; MIPS32-LABEL: ord_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.un.s $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -55,10 +53,9 @@ define i1 @oeq_s(float %x, float %y) { ; MIPS32-LABEL: oeq_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.eq.s $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -68,10 +65,9 @@ define i1 @une_s(float %x, float %y) { ; MIPS32-LABEL: une_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.eq.s $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -83,10 +79,9 @@ define i1 @ueq_s(float %x, float %y) { ; MIPS32-LABEL: ueq_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ueq.s $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -96,10 +91,9 @@ define i1 @one_s(float %x, float %y) { ; MIPS32-LABEL: one_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ueq.s $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -111,10 +105,9 @@ define i1 @olt_s(float %x, float %y) { ; MIPS32-LABEL: olt_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.olt.s $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -124,10 +117,9 @@ define i1 @uge_s(float %x, float %y) { ; MIPS32-LABEL: uge_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.olt.s $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -139,10 +131,9 @@ define i1 @ult_s(float %x, float %y) { ; MIPS32-LABEL: ult_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ult.s $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -152,10 +143,9 @@ define i1 @oge_s(float %x, float %y) { ; MIPS32-LABEL: oge_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ult.s $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -167,10 +157,9 @@ define i1 @ole_s(float %x, float %y) { ; MIPS32-LABEL: ole_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ole.s $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -180,10 +169,9 @@ define i1 @ugt_s(float %x, float %y) { ; MIPS32-LABEL: ugt_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ole.s $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -195,10 +183,9 @@ define i1 @ule_s(float %x, float %y) { ; MIPS32-LABEL: ule_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ule.s $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -208,10 +195,9 @@ define i1 @ogt_s(float %x, float %y) { ; MIPS32-LABEL: ogt_s: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ule.s $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -245,10 +231,9 @@ define i1 @uno_d(double %x, double %y) { ; MIPS32-LABEL: uno_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.un.d $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -258,10 +243,9 @@ define i1 @ord_d(double %x, double %y) { ; MIPS32-LABEL: ord_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.un.d $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -273,10 +257,9 @@ define i1 @oeq_d(double %x, double %y) { ; MIPS32-LABEL: oeq_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.eq.d $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -286,10 +269,9 @@ define i1 @une_d(double %x, double %y) { ; MIPS32-LABEL: une_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.eq.d $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -301,10 +283,9 @@ define i1 @ueq_d(double %x, double %y) { ; MIPS32-LABEL: ueq_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ueq.d $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -314,10 +295,9 @@ define i1 @one_d(double %x, double %y) { ; MIPS32-LABEL: one_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ueq.d $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -329,10 +309,9 @@ define i1 @olt_d(double %x, double %y) { ; MIPS32-LABEL: olt_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.olt.d $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -342,10 +321,9 @@ define i1 @uge_d(double %x, double %y) { ; MIPS32-LABEL: uge_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.olt.d $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -357,10 +335,9 @@ define i1 @ult_d(double %x, double %y) { ; MIPS32-LABEL: ult_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ult.d $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -370,10 +347,9 @@ define i1 @oge_d(double %x, double %y) { ; MIPS32-LABEL: oge_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ult.d $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -385,10 +361,9 @@ define i1 @ole_d(double %x, double %y) { ; MIPS32-LABEL: ole_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ole.d $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -398,10 +373,9 @@ define i1 @ugt_d(double %x, double %y) { ; MIPS32-LABEL: ugt_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ole.d $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -413,10 +387,9 @@ define i1 @ule_d(double %x, double %y) { ; MIPS32-LABEL: ule_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ule.d $f12, $f14 -; MIPS32-NEXT: movf $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movf $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -426,10 +399,9 @@ define i1 @ogt_d(double %x, double %y) { ; MIPS32-LABEL: ogt_d: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $1, $zero, 1 +; MIPS32-NEXT: addiu $2, $zero, 1 ; MIPS32-NEXT: c.ule.d $f12, $f14 -; MIPS32-NEXT: movt $1, $zero, $fcc0 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: movt $2, $zero, $fcc0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/float_constants.ll @@ -18,22 +18,22 @@ ; FP32-LABEL: e_double_precision: ; FP32: # %bb.0: # %entry ; FP32-NEXT: lui $1, 16389 -; FP32-NEXT: ori $1, $1, 48906 -; FP32-NEXT: lui $2, 35604 -; FP32-NEXT: ori $2, $2, 22377 -; FP32-NEXT: mtc1 $2, $f0 -; FP32-NEXT: mtc1 $1, $f1 +; FP32-NEXT: ori $2, $1, 48906 +; FP32-NEXT: lui $1, 35604 +; FP32-NEXT: ori $1, $1, 22377 +; FP32-NEXT: mtc1 $1, $f0 +; FP32-NEXT: mtc1 $2, $f1 ; FP32-NEXT: jr $ra ; FP32-NEXT: nop ; ; FP64-LABEL: e_double_precision: ; FP64: # %bb.0: # %entry ; FP64-NEXT: lui $1, 16389 -; FP64-NEXT: ori $1, $1, 48906 -; FP64-NEXT: lui $2, 35604 -; FP64-NEXT: ori $2, $2, 22377 -; FP64-NEXT: mtc1 $2, $f0 -; FP64-NEXT: mthc1 $1, $f0 +; FP64-NEXT: ori $2, $1, 48906 +; FP64-NEXT: lui $1, 35604 +; FP64-NEXT: ori $1, $1, 22377 +; FP64-NEXT: mtc1 $1, $f0 +; FP64-NEXT: mthc1 $2, $f0 ; FP64-NEXT: jr $ra ; FP64-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll @@ -164,20 +164,20 @@ ; MIPS32-LABEL: f32tou16: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: trunc.w.s $f0, $f12 -; MIPS32-NEXT: mfc1 $1, $f0 -; MIPS32-NEXT: lui $2, 20224 -; MIPS32-NEXT: mtc1 $2, $f0 +; MIPS32-NEXT: mfc1 $2, $f0 +; MIPS32-NEXT: lui $1, 20224 +; MIPS32-NEXT: mtc1 $1, $f0 ; MIPS32-NEXT: sub.s $f1, $f12, $f0 ; MIPS32-NEXT: trunc.w.s $f1, $f1 -; MIPS32-NEXT: mfc1 $2, $f1 +; MIPS32-NEXT: mfc1 $1, $f1 ; MIPS32-NEXT: lui $3, 32768 -; MIPS32-NEXT: xor $2, $2, $3 +; MIPS32-NEXT: xor $1, $1, $3 ; MIPS32-NEXT: addiu $3, $zero, 1 ; MIPS32-NEXT: c.ult.s $f12, $f0 ; MIPS32-NEXT: movf $3, $zero, $fcc0 ; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 -; MIPS32-NEXT: andi $2, $2, 65535 +; MIPS32-NEXT: movn $1, $2, $3 +; MIPS32-NEXT: andi $2, $1, 65535 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -189,20 +189,20 @@ ; MIPS32-LABEL: f32tou8: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: trunc.w.s $f0, $f12 -; MIPS32-NEXT: mfc1 $1, $f0 -; MIPS32-NEXT: lui $2, 20224 -; MIPS32-NEXT: mtc1 $2, $f0 +; MIPS32-NEXT: mfc1 $2, $f0 +; MIPS32-NEXT: lui $1, 20224 +; MIPS32-NEXT: mtc1 $1, $f0 ; MIPS32-NEXT: sub.s $f1, $f12, $f0 ; MIPS32-NEXT: trunc.w.s $f1, $f1 -; MIPS32-NEXT: mfc1 $2, $f1 +; MIPS32-NEXT: mfc1 $1, $f1 ; MIPS32-NEXT: lui $3, 32768 -; MIPS32-NEXT: xor $2, $2, $3 +; MIPS32-NEXT: xor $1, $1, $3 ; MIPS32-NEXT: addiu $3, $zero, 1 ; MIPS32-NEXT: c.ult.s $f12, $f0 ; MIPS32-NEXT: movf $3, $zero, $fcc0 ; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: movn $2, $1, $3 -; MIPS32-NEXT: andi $2, $2, 255 +; MIPS32-NEXT: movn $1, $2, $3 +; MIPS32-NEXT: andi $2, $1, 255 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -233,10 +233,10 @@ ; FP32: # %bb.0: # %entry ; FP32-NEXT: trunc.w.d $f0, $f12 ; FP32-NEXT: mfc1 $1, $f0 -; FP32-NEXT: lui $2, 16864 -; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f0 -; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: lui $3, 16864 +; FP32-NEXT: ori $2, $zero, 0 +; FP32-NEXT: mtc1 $2, $f0 +; FP32-NEXT: mtc1 $3, $f1 ; FP32-NEXT: sub.d $f2, $f12, $f0 ; FP32-NEXT: trunc.w.d $f2, $f2 ; FP32-NEXT: mfc1 $2, $f2 @@ -254,10 +254,10 @@ ; FP64: # %bb.0: # %entry ; FP64-NEXT: trunc.w.d $f0, $f12 ; FP64-NEXT: mfc1 $1, $f0 -; FP64-NEXT: lui $2, 16864 -; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f0 -; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: lui $3, 16864 +; FP64-NEXT: ori $2, $zero, 0 +; FP64-NEXT: mtc1 $2, $f0 +; FP64-NEXT: mthc1 $3, $f0 ; FP64-NEXT: sub.d $f1, $f12, $f0 ; FP64-NEXT: trunc.w.d $f1, $f1 ; FP64-NEXT: mfc1 $2, $f1 @@ -279,44 +279,44 @@ ; FP32-LABEL: f64tou16: ; FP32: # %bb.0: # %entry ; FP32-NEXT: trunc.w.d $f0, $f12 -; FP32-NEXT: mfc1 $1, $f0 -; FP32-NEXT: lui $2, 16864 -; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f0 -; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: mfc1 $2, $f0 +; FP32-NEXT: lui $3, 16864 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f0 +; FP32-NEXT: mtc1 $3, $f1 ; FP32-NEXT: sub.d $f2, $f12, $f0 ; FP32-NEXT: trunc.w.d $f2, $f2 -; FP32-NEXT: mfc1 $2, $f2 +; FP32-NEXT: mfc1 $1, $f2 ; FP32-NEXT: lui $3, 32768 -; FP32-NEXT: xor $2, $2, $3 +; FP32-NEXT: xor $1, $1, $3 ; FP32-NEXT: addiu $3, $zero, 1 ; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 ; FP32-NEXT: andi $3, $3, 1 -; FP32-NEXT: movn $2, $1, $3 -; FP32-NEXT: andi $2, $2, 65535 +; FP32-NEXT: movn $1, $2, $3 +; FP32-NEXT: andi $2, $1, 65535 ; FP32-NEXT: jr $ra ; FP32-NEXT: nop ; ; FP64-LABEL: f64tou16: ; FP64: # %bb.0: # %entry ; FP64-NEXT: trunc.w.d $f0, $f12 -; FP64-NEXT: mfc1 $1, $f0 -; FP64-NEXT: lui $2, 16864 -; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f0 -; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: mfc1 $2, $f0 +; FP64-NEXT: lui $3, 16864 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f0 +; FP64-NEXT: mthc1 $3, $f0 ; FP64-NEXT: sub.d $f1, $f12, $f0 ; FP64-NEXT: trunc.w.d $f1, $f1 -; FP64-NEXT: mfc1 $2, $f1 +; FP64-NEXT: mfc1 $1, $f1 ; FP64-NEXT: lui $3, 32768 -; FP64-NEXT: xor $2, $2, $3 +; FP64-NEXT: xor $1, $1, $3 ; FP64-NEXT: addiu $3, $zero, 1 ; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 ; FP64-NEXT: andi $3, $3, 1 -; FP64-NEXT: movn $2, $1, $3 -; FP64-NEXT: andi $2, $2, 65535 +; FP64-NEXT: movn $1, $2, $3 +; FP64-NEXT: andi $2, $1, 65535 ; FP64-NEXT: jr $ra ; FP64-NEXT: nop entry: @@ -328,44 +328,44 @@ ; FP32-LABEL: f64tou8: ; FP32: # %bb.0: # %entry ; FP32-NEXT: trunc.w.d $f0, $f12 -; FP32-NEXT: mfc1 $1, $f0 -; FP32-NEXT: lui $2, 16864 -; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f0 -; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: mfc1 $2, $f0 +; FP32-NEXT: lui $3, 16864 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f0 +; FP32-NEXT: mtc1 $3, $f1 ; FP32-NEXT: sub.d $f2, $f12, $f0 ; FP32-NEXT: trunc.w.d $f2, $f2 -; FP32-NEXT: mfc1 $2, $f2 +; FP32-NEXT: mfc1 $1, $f2 ; FP32-NEXT: lui $3, 32768 -; FP32-NEXT: xor $2, $2, $3 +; FP32-NEXT: xor $1, $1, $3 ; FP32-NEXT: addiu $3, $zero, 1 ; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 ; FP32-NEXT: andi $3, $3, 1 -; FP32-NEXT: movn $2, $1, $3 -; FP32-NEXT: andi $2, $2, 255 +; FP32-NEXT: movn $1, $2, $3 +; FP32-NEXT: andi $2, $1, 255 ; FP32-NEXT: jr $ra ; FP32-NEXT: nop ; ; FP64-LABEL: f64tou8: ; FP64: # %bb.0: # %entry ; FP64-NEXT: trunc.w.d $f0, $f12 -; FP64-NEXT: mfc1 $1, $f0 -; FP64-NEXT: lui $2, 16864 -; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f0 -; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: mfc1 $2, $f0 +; FP64-NEXT: lui $3, 16864 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f0 +; FP64-NEXT: mthc1 $3, $f0 ; FP64-NEXT: sub.d $f1, $f12, $f0 ; FP64-NEXT: trunc.w.d $f1, $f1 -; FP64-NEXT: mfc1 $2, $f1 +; FP64-NEXT: mfc1 $1, $f1 ; FP64-NEXT: lui $3, 32768 -; FP64-NEXT: xor $2, $2, $3 +; FP64-NEXT: xor $1, $1, $3 ; FP64-NEXT: addiu $3, $zero, 1 ; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 ; FP64-NEXT: andi $3, $3, 1 -; FP64-NEXT: movn $2, $1, $3 -; FP64-NEXT: andi $2, $2, 255 +; FP64-NEXT: movn $1, $2, $3 +; FP64-NEXT: andi $2, $1, 255 ; FP64-NEXT: jr $ra ; FP64-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address.ll @@ -14,12 +14,11 @@ ; MIPS32-NEXT: addiu $4, $1, %lo($.str) ; MIPS32-NEXT: lui $1, 18838 ; MIPS32-NEXT: ori $5, $1, 722 -; MIPS32-NEXT: ori $2, $zero, 0 -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $1, $zero, 0 +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: jal printf ; MIPS32-NEXT: nop -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 24 ; MIPS32-NEXT: jr $ra Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/global_address_pic.ll @@ -23,9 +23,8 @@ ; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24 ; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32_PIC-NEXT: .cfi_offset 31, -4 -; MIPS32_PIC-NEXT: addu $1, $2, $25 -; MIPS32_PIC-NEXT: lw $25, %call16(f)($1) -; MIPS32_PIC-NEXT: move $gp, $1 +; MIPS32_PIC-NEXT: addu $gp, $2, $25 +; MIPS32_PIC-NEXT: lw $25, %call16(f)($gp) ; MIPS32_PIC-NEXT: jalr $25 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload @@ -46,10 +45,9 @@ ; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 24 ; MIPS32_PIC-NEXT: sw $ra, 20($sp) # 4-byte Folded Spill ; MIPS32_PIC-NEXT: .cfi_offset 31, -4 -; MIPS32_PIC-NEXT: addu $1, $2, $25 -; MIPS32_PIC-NEXT: lw $2, %got(f_with_local_linkage)($1) -; MIPS32_PIC-NEXT: addiu $25, $2, %lo(f_with_local_linkage) -; MIPS32_PIC-NEXT: move $gp, $1 +; MIPS32_PIC-NEXT: addu $gp, $2, $25 +; MIPS32_PIC-NEXT: lw $1, %got(f_with_local_linkage)($gp) +; MIPS32_PIC-NEXT: addiu $25, $1, %lo(f_with_local_linkage) ; MIPS32_PIC-NEXT: jalr $25 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: lw $ra, 20($sp) # 4-byte Folded Reload Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/icmp.ll @@ -188,13 +188,12 @@ define i1 @sgt_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: sgt_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: slt $1, $7, $5 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $6, $4 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: slt $2, $7, $5 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $6, $4 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -206,14 +205,13 @@ ; MIPS32-LABEL: sge_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: slt $1, $5, $7 +; MIPS32-NEXT: xori $2, $1, 1 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $4, $6 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $4, $6 -; MIPS32-NEXT: xori $3, $3, 1 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -224,13 +222,12 @@ define i1 @slt_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: slt_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: slt $1, $5, $7 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $4, $6 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: slt $2, $5, $7 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $4, $6 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -242,14 +239,13 @@ ; MIPS32-LABEL: sle_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: slt $1, $7, $5 +; MIPS32-NEXT: xori $2, $1, 1 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $6, $4 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $6, $4 -; MIPS32-NEXT: xori $3, $3, 1 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -260,13 +256,12 @@ define i1 @ugt_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: ugt_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sltu $1, $7, $5 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $6, $4 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: sltu $2, $7, $5 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $6, $4 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -278,14 +273,13 @@ ; MIPS32-LABEL: uge_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sltu $1, $5, $7 +; MIPS32-NEXT: xori $2, $1, 1 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $4, $6 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $4, $6 -; MIPS32-NEXT: xori $3, $3, 1 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -296,13 +290,12 @@ define i1 @ult_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: ult_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sltu $1, $5, $7 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $4, $6 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: sltu $2, $5, $7 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $4, $6 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -314,14 +307,13 @@ ; MIPS32-LABEL: ule_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sltu $1, $7, $5 +; MIPS32-NEXT: xori $2, $1, 1 +; MIPS32-NEXT: xor $1, $5, $7 +; MIPS32-NEXT: sltiu $3, $1, 1 +; MIPS32-NEXT: sltu $1, $6, $4 ; MIPS32-NEXT: xori $1, $1, 1 -; MIPS32-NEXT: xor $2, $5, $7 -; MIPS32-NEXT: sltiu $2, $2, 1 -; MIPS32-NEXT: sltu $3, $6, $4 -; MIPS32-NEXT: xori $3, $3, 1 -; MIPS32-NEXT: andi $2, $2, 1 -; MIPS32-NEXT: movn $1, $3, $2 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/jump_table_and_brjt.ll @@ -7,35 +7,35 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -32 ; MIPS32-NEXT: .cfi_def_cfa_offset 32 +; MIPS32-NEXT: sw $4, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: ori $1, $zero, 7 ; MIPS32-NEXT: ori $2, $zero, 3 -; MIPS32-NEXT: ori $3, $zero, 2 -; MIPS32-NEXT: ori $5, $zero, 1 -; MIPS32-NEXT: ori $6, $zero, 0 -; MIPS32-NEXT: addiu $7, $zero, 65535 -; MIPS32-NEXT: ori $8, $zero, 0 -; MIPS32-NEXT: subu $8, $4, $8 -; MIPS32-NEXT: sltu $1, $1, $8 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: sw $4, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $2, $zero, 2 +; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $2, $zero, 1 +; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $2, $zero, 0 +; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $2, $zero, 65535 ; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $8, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $2, $zero, 0 +; MIPS32-NEXT: subu $2, $4, $2 +; MIPS32-NEXT: sw $2, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sltu $1, $1, $2 +; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: bnez $1, $BB0_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_1: # %entry +; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lui $1, %hi($JTI0_0) -; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sll $3, $2, 2 -; MIPS32-NEXT: addu $1, $1, $3 +; MIPS32-NEXT: sll $2, $2, 2 +; MIPS32-NEXT: addu $1, $1, $2 ; MIPS32-NEXT: lw $1, %lo($JTI0_0)($1) ; MIPS32-NEXT: jr $1 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_2: # %sw.bb -; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -45,37 +45,37 @@ ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_4: # %sw.bb2 -; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_5: # %sw.bb3 -; MIPS32-NEXT: lw $2, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_6: # %sw.default ; MIPS32-NEXT: .insn ; MIPS32-NEXT: # %bb.7: # %sw.epilog -; MIPS32-NEXT: ori $1, $zero, 8 -; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: subu $1, $2, $1 -; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sltu $4, $3, $1 -; MIPS32-NEXT: andi $4, $4, 1 -; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $4, $BB0_13 +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: ori $3, $zero, 8 +; MIPS32-NEXT: subu $2, $2, $3 +; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sltu $1, $1, $2 +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB0_13 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_8: # %sw.epilog -; MIPS32-NEXT: lui $1, %hi($JTI0_1) ; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sll $3, $2, 2 -; MIPS32-NEXT: addu $1, $1, $3 +; MIPS32-NEXT: lui $1, %hi($JTI0_1) +; MIPS32-NEXT: sll $2, $2, 2 +; MIPS32-NEXT: addu $1, $1, $2 ; MIPS32-NEXT: lw $1, %lo($JTI0_1)($1) ; MIPS32-NEXT: jr $1 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_9: # %sw.bb4 -; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -85,35 +85,20 @@ ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_11: # %sw.bb6 -; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_12: # %sw.bb7 -; MIPS32-NEXT: lw $2, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_13: # %sw.default8 -; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 24($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32: $JTI0_0: -; MIPS32-NEXT: .4byte ($BB0_2) -; MIPS32-NEXT: .4byte ($BB0_3) -; MIPS32-NEXT: .4byte ($BB0_4) -; MIPS32-NEXT: .4byte ($BB0_5) -; MIPS32-NEXT: .4byte ($BB0_2) -; MIPS32-NEXT: .4byte ($BB0_3) -; MIPS32-NEXT: .4byte ($BB0_4) -; MIPS32-NEXT: .4byte ($BB0_5) -; MIPS32-NEXT: $JTI0_1: -; MIPS32-NEXT: .4byte ($BB0_9) -; MIPS32-NEXT: .4byte ($BB0_10) -; MIPS32-NEXT: .4byte ($BB0_11) -; MIPS32-NEXT: .4byte ($BB0_12) - ; ; MIPS32_PIC-LABEL: mod4_0_to_11: ; MIPS32_PIC: # %bb.0: # %entry @@ -122,117 +107,104 @@ ; MIPS32_PIC-NEXT: addiu $sp, $sp, -40 ; MIPS32_PIC-NEXT: .cfi_def_cfa_offset 40 ; MIPS32_PIC-NEXT: addu $1, $2, $25 -; MIPS32_PIC-NEXT: ori $2, $zero, 7 -; MIPS32_PIC-NEXT: ori $3, $zero, 3 -; MIPS32_PIC-NEXT: ori $5, $zero, 2 -; MIPS32_PIC-NEXT: ori $6, $zero, 1 -; MIPS32_PIC-NEXT: ori $7, $zero, 0 -; MIPS32_PIC-NEXT: addiu $8, $zero, 65535 -; MIPS32_PIC-NEXT: ori $9, $zero, 0 -; MIPS32_PIC-NEXT: subu $9, $4, $9 -; MIPS32_PIC-NEXT: sltu $2, $2, $9 -; MIPS32_PIC-NEXT: andi $2, $2, 1 -; MIPS32_PIC-NEXT: sw $1, 36($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: sw $4, 32($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: sw $3, 28($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: sw $5, 24($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: sw $6, 20($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: sw $7, 16($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: sw $8, 12($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: sw $9, 8($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: bnez $2, $BB0_6 +; MIPS32_PIC-NEXT: sw $1, 8($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: sw $4, 12($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: ori $1, $zero, 7 +; MIPS32_PIC-NEXT: ori $2, $zero, 3 +; MIPS32_PIC-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: ori $2, $zero, 2 +; MIPS32_PIC-NEXT: sw $2, 20($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: ori $2, $zero, 1 +; MIPS32_PIC-NEXT: sw $2, 24($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: ori $2, $zero, 0 +; MIPS32_PIC-NEXT: sw $2, 28($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: addiu $2, $zero, 65535 +; MIPS32_PIC-NEXT: sw $2, 32($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: ori $2, $zero, 0 +; MIPS32_PIC-NEXT: subu $2, $4, $2 +; MIPS32_PIC-NEXT: sw $2, 36($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: sltu $1, $1, $2 +; MIPS32_PIC-NEXT: andi $1, $1, 1 +; MIPS32_PIC-NEXT: bnez $1, $BB0_6 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_1: # %entry -; MIPS32_PIC-NEXT: lw $1, 36($sp) # 4-byte Folded Reload -; MIPS32_PIC-NEXT: lw $2, %got($JTI0_0)($1) -; MIPS32_PIC-NEXT: lw $3, 8($sp) # 4-byte Folded Reload -; MIPS32_PIC-NEXT: sll $4, $3, 2 -; MIPS32_PIC-NEXT: addu $2, $2, $4 -; MIPS32_PIC-NEXT: lw $2, %lo($JTI0_0)($2) -; MIPS32_PIC-NEXT: addu $2, $2, $1 -; MIPS32_PIC-NEXT: jr $2 +; MIPS32_PIC-NEXT: lw $2, 8($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $3, 36($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $1, %got($JTI0_0)($2) +; MIPS32_PIC-NEXT: sll $3, $3, 2 +; MIPS32_PIC-NEXT: addu $1, $1, $3 +; MIPS32_PIC-NEXT: lw $1, %lo($JTI0_0)($1) +; MIPS32_PIC-NEXT: addu $1, $1, $2 +; MIPS32_PIC-NEXT: jr $1 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_2: # %sw.bb -; MIPS32_PIC-NEXT: lw $2, 16($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 28($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_3: # %sw.bb1 -; MIPS32_PIC-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 24($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_4: # %sw.bb2 -; MIPS32_PIC-NEXT: lw $2, 24($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_5: # %sw.bb3 -; MIPS32_PIC-NEXT: lw $2, 28($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 16($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_6: # %sw.default ; MIPS32_PIC-NEXT: .insn ; MIPS32_PIC-NEXT: # %bb.7: # %sw.epilog -; MIPS32_PIC-NEXT: ori $1, $zero, 8 -; MIPS32_PIC-NEXT: lw $2, 32($sp) # 4-byte Folded Reload -; MIPS32_PIC-NEXT: subu $1, $2, $1 -; MIPS32_PIC-NEXT: lw $3, 28($sp) # 4-byte Folded Reload -; MIPS32_PIC-NEXT: sltu $4, $3, $1 -; MIPS32_PIC-NEXT: andi $4, $4, 1 -; MIPS32_PIC-NEXT: sw $1, 4($sp) # 4-byte Folded Spill -; MIPS32_PIC-NEXT: bnez $4, $BB0_13 +; MIPS32_PIC-NEXT: lw $1, 16($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: ori $3, $zero, 8 +; MIPS32_PIC-NEXT: subu $2, $2, $3 +; MIPS32_PIC-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32_PIC-NEXT: sltu $1, $1, $2 +; MIPS32_PIC-NEXT: andi $1, $1, 1 +; MIPS32_PIC-NEXT: bnez $1, $BB0_13 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_8: # %sw.epilog -; MIPS32_PIC-NEXT: lw $1, 36($sp) # 4-byte Folded Reload -; MIPS32_PIC-NEXT: lw $2, %got($JTI0_1)($1) +; MIPS32_PIC-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: lw $3, 4($sp) # 4-byte Folded Reload -; MIPS32_PIC-NEXT: sll $4, $3, 2 -; MIPS32_PIC-NEXT: addu $2, $2, $4 -; MIPS32_PIC-NEXT: lw $2, %lo($JTI0_1)($2) -; MIPS32_PIC-NEXT: addu $2, $2, $1 -; MIPS32_PIC-NEXT: jr $2 +; MIPS32_PIC-NEXT: lw $1, %got($JTI0_1)($2) +; MIPS32_PIC-NEXT: sll $3, $3, 2 +; MIPS32_PIC-NEXT: addu $1, $1, $3 +; MIPS32_PIC-NEXT: lw $1, %lo($JTI0_1)($1) +; MIPS32_PIC-NEXT: addu $1, $1, $2 +; MIPS32_PIC-NEXT: jr $1 ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_9: # %sw.bb4 -; MIPS32_PIC-NEXT: lw $2, 16($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 28($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_10: # %sw.bb5 -; MIPS32_PIC-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 24($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_11: # %sw.bb6 -; MIPS32_PIC-NEXT: lw $2, 24($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_12: # %sw.bb7 -; MIPS32_PIC-NEXT: lw $2, 28($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 16($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop ; MIPS32_PIC-NEXT: $BB0_13: # %sw.default8 -; MIPS32_PIC-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS32_PIC-NEXT: lw $2, 32($sp) # 4-byte Folded Reload ; MIPS32_PIC-NEXT: addiu $sp, $sp, 40 ; MIPS32_PIC-NEXT: jr $ra ; MIPS32_PIC-NEXT: nop -; MIPS32_PIC: $JTI0_0: -; MIPS32_PIC-NEXT: .gpword ($BB0_2) -; MIPS32_PIC-NEXT: .gpword ($BB0_3) -; MIPS32_PIC-NEXT: .gpword ($BB0_4) -; MIPS32_PIC-NEXT: .gpword ($BB0_5) -; MIPS32_PIC-NEXT: .gpword ($BB0_2) -; MIPS32_PIC-NEXT: .gpword ($BB0_3) -; MIPS32_PIC-NEXT: .gpword ($BB0_4) -; MIPS32_PIC-NEXT: .gpword ($BB0_5) -; MIPS32_PIC-NEXT: $JTI0_1: -; MIPS32_PIC-NEXT: .gpword ($BB0_9) -; MIPS32_PIC-NEXT: .gpword ($BB0_10) -; MIPS32_PIC-NEXT: .gpword ($BB0_11) -; MIPS32_PIC-NEXT: .gpword ($BB0_12) + entry: switch i32 %a, label %sw.default [ Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_4_unaligned.ll @@ -15,11 +15,11 @@ ; MIPS32-LABEL: load_float_align1: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(float_align1) -; MIPS32-NEXT: addiu $1, $1, %lo(float_align1) -; MIPS32-NEXT: # implicit-def: $v0 -; MIPS32-NEXT: lwl $2, 3($1) -; MIPS32-NEXT: lwr $2, 0($1) -; MIPS32-NEXT: mtc1 $2, $f0 +; MIPS32-NEXT: addiu $2, $1, %lo(float_align1) +; MIPS32-NEXT: # implicit-def: $at +; MIPS32-NEXT: lwl $1, 3($2) +; MIPS32-NEXT: lwr $1, 0($2) +; MIPS32-NEXT: mtc1 $1, $f0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; @@ -38,11 +38,11 @@ ; MIPS32-LABEL: load_float_align2: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(float_align2) -; MIPS32-NEXT: addiu $1, $1, %lo(float_align2) -; MIPS32-NEXT: # implicit-def: $v0 -; MIPS32-NEXT: lwl $2, 3($1) -; MIPS32-NEXT: lwr $2, 0($1) -; MIPS32-NEXT: mtc1 $2, $f0 +; MIPS32-NEXT: addiu $2, $1, %lo(float_align2) +; MIPS32-NEXT: # implicit-def: $at +; MIPS32-NEXT: lwl $1, 3($2) +; MIPS32-NEXT: lwr $1, 0($2) +; MIPS32-NEXT: mtc1 $1, $f0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/load_split_because_of_memsize_or_align.ll @@ -131,25 +131,23 @@ define i64 @load5align1(%struct.MemSize5_Align1* %S) { ; MIPS32-LABEL: load5align1: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: # implicit-def: $at -; MIPS32-NEXT: lwl $1, 3($4) -; MIPS32-NEXT: lwr $1, 0($4) -; MIPS32-NEXT: lbu $2, 4($4) +; MIPS32-NEXT: # implicit-def: $v0 +; MIPS32-NEXT: lwl $2, 3($4) +; MIPS32-NEXT: lwr $2, 0($4) +; MIPS32-NEXT: lbu $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 255 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 255 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load5align1: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lbu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lbu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 255 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 255 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize5_Align1* %S to i40* @@ -161,25 +159,23 @@ define i64 @load5align2(%struct.MemSize5_Align2* %S) { ; MIPS32-LABEL: load5align2: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: # implicit-def: $at -; MIPS32-NEXT: lwl $1, 3($4) -; MIPS32-NEXT: lwr $1, 0($4) -; MIPS32-NEXT: lbu $2, 4($4) +; MIPS32-NEXT: # implicit-def: $v0 +; MIPS32-NEXT: lwl $2, 3($4) +; MIPS32-NEXT: lwr $2, 0($4) +; MIPS32-NEXT: lbu $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 255 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 255 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load5align2: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lbu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lbu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 255 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 255 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize5_Align2* %S to i40* @@ -191,23 +187,21 @@ define i64 @load5align4(%struct.MemSize5_Align4* %S) { ; MIPS32-LABEL: load5align4: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: lbu $2, 4($4) +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lbu $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 255 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 255 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load5align4: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lbu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lbu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 255 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 255 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize5_Align4* %S to i40* @@ -219,23 +213,21 @@ define i64 @load5align8(%struct.MemSize5_Align8* %S) { ; MIPS32-LABEL: load5align8: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: lbu $2, 4($4) +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lbu $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 255 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 255 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load5align8: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lbu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lbu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 255 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 255 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize5_Align8* %S to i40* @@ -247,27 +239,25 @@ define i64 @load6align1(%struct.MemSize6_Align1* %S) { ; MIPS32-LABEL: load6align1: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: # implicit-def: $at -; MIPS32-NEXT: lwl $1, 3($4) -; MIPS32-NEXT: lwr $1, 0($4) ; MIPS32-NEXT: # implicit-def: $v0 -; MIPS32-NEXT: lwl $2, 7($4) -; MIPS32-NEXT: lwr $2, 4($4) +; MIPS32-NEXT: lwl $2, 3($4) +; MIPS32-NEXT: lwr $2, 0($4) +; MIPS32-NEXT: # implicit-def: $at +; MIPS32-NEXT: lwl $1, 7($4) +; MIPS32-NEXT: lwr $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 65535 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 65535 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load6align1: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lhu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lhu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 65535 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 65535 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize6_Align1* %S to i48* @@ -279,25 +269,23 @@ define i64 @load6align2(%struct.MemSize6_Align2* %S) { ; MIPS32-LABEL: load6align2: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: # implicit-def: $at -; MIPS32-NEXT: lwl $1, 3($4) -; MIPS32-NEXT: lwr $1, 0($4) -; MIPS32-NEXT: lhu $2, 4($4) +; MIPS32-NEXT: # implicit-def: $v0 +; MIPS32-NEXT: lwl $2, 3($4) +; MIPS32-NEXT: lwr $2, 0($4) +; MIPS32-NEXT: lhu $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 65535 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 65535 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load6align2: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lhu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lhu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 65535 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 65535 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize6_Align2* %S to i48* @@ -309,23 +297,21 @@ define i64 @load6align4(%struct.MemSize6_Align4* %S) { ; MIPS32-LABEL: load6align4: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: lhu $2, 4($4) +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lhu $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 65535 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 65535 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load6align4: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lhu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lhu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 65535 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 65535 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize6_Align4* %S to i48* @@ -337,23 +323,21 @@ define i64 @load6align8(%struct.MemSize6_Align8* %S) { ; MIPS32-LABEL: load6align8: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: lhu $2, 4($4) +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lhu $1, 4($4) ; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: andi $3, $2, 65535 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: and $2, $2, $3 +; MIPS32-NEXT: andi $3, $1, 65535 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load6align8: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lhu $2, 4($4) +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lhu $1, 4($4) ; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: andi $3, $2, 65535 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: and $2, $2, $3 +; MIPS32R6-NEXT: andi $3, $1, 65535 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize6_Align8* %S to i48* @@ -365,31 +349,29 @@ define i64 @load7align1(%struct.MemSize7_Align1* %S) { ; MIPS32-LABEL: load7align1: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: # implicit-def: $at -; MIPS32-NEXT: lwl $1, 3($4) -; MIPS32-NEXT: lwr $1, 0($4) ; MIPS32-NEXT: # implicit-def: $v0 -; MIPS32-NEXT: lwl $2, 7($4) -; MIPS32-NEXT: lwr $2, 4($4) -; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: lui $4, 255 -; MIPS32-NEXT: ori $4, $4, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: and $3, $2, $4 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lwl $2, 3($4) +; MIPS32-NEXT: lwr $2, 0($4) +; MIPS32-NEXT: # implicit-def: $at +; MIPS32-NEXT: lwl $1, 7($4) +; MIPS32-NEXT: lwr $1, 4($4) +; MIPS32-NEXT: addiu $4, $zero, 65535 +; MIPS32-NEXT: lui $3, 255 +; MIPS32-NEXT: ori $3, $3, 65535 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: and $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load7align1: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lw $2, 4($4) -; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: lui $4, 255 -; MIPS32R6-NEXT: ori $4, $4, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: and $3, $2, $4 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lw $1, 4($4) +; MIPS32R6-NEXT: addiu $4, $zero, 65535 +; MIPS32R6-NEXT: lui $3, 255 +; MIPS32R6-NEXT: ori $3, $3, 65535 +; MIPS32R6-NEXT: and $2, $2, $4 +; MIPS32R6-NEXT: and $3, $1, $3 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align1* %S to i56* @@ -401,31 +383,29 @@ define i64 @load7align2(%struct.MemSize7_Align2* %S) { ; MIPS32-LABEL: load7align2: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: # implicit-def: $at -; MIPS32-NEXT: lwl $1, 3($4) -; MIPS32-NEXT: lwr $1, 0($4) ; MIPS32-NEXT: # implicit-def: $v0 -; MIPS32-NEXT: lwl $2, 7($4) -; MIPS32-NEXT: lwr $2, 4($4) -; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: lui $4, 255 -; MIPS32-NEXT: ori $4, $4, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: and $3, $2, $4 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lwl $2, 3($4) +; MIPS32-NEXT: lwr $2, 0($4) +; MIPS32-NEXT: # implicit-def: $at +; MIPS32-NEXT: lwl $1, 7($4) +; MIPS32-NEXT: lwr $1, 4($4) +; MIPS32-NEXT: addiu $4, $zero, 65535 +; MIPS32-NEXT: lui $3, 255 +; MIPS32-NEXT: ori $3, $3, 65535 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: and $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load7align2: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lw $2, 4($4) -; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: lui $4, 255 -; MIPS32R6-NEXT: ori $4, $4, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: and $3, $2, $4 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lw $1, 4($4) +; MIPS32R6-NEXT: addiu $4, $zero, 65535 +; MIPS32R6-NEXT: lui $3, 255 +; MIPS32R6-NEXT: ori $3, $3, 65535 +; MIPS32R6-NEXT: and $2, $2, $4 +; MIPS32R6-NEXT: and $3, $1, $3 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align2* %S to i56* @@ -437,27 +417,25 @@ define i64 @load7align4(%struct.MemSize7_Align4* %S) { ; MIPS32-LABEL: load7align4: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: lw $2, 4($4) -; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: lui $4, 255 -; MIPS32-NEXT: ori $4, $4, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: and $3, $2, $4 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lw $1, 4($4) +; MIPS32-NEXT: addiu $4, $zero, 65535 +; MIPS32-NEXT: lui $3, 255 +; MIPS32-NEXT: ori $3, $3, 65535 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: and $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load7align4: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lw $2, 4($4) -; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: lui $4, 255 -; MIPS32R6-NEXT: ori $4, $4, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: and $3, $2, $4 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lw $1, 4($4) +; MIPS32R6-NEXT: addiu $4, $zero, 65535 +; MIPS32R6-NEXT: lui $3, 255 +; MIPS32R6-NEXT: ori $3, $3, 65535 +; MIPS32R6-NEXT: and $2, $2, $4 +; MIPS32R6-NEXT: and $3, $1, $3 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align4* %S to i56* @@ -469,27 +447,25 @@ define i64 @load7align8(%struct.MemSize7_Align8* %S) { ; MIPS32-LABEL: load7align8: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: lw $2, 4($4) -; MIPS32-NEXT: addiu $3, $zero, 65535 -; MIPS32-NEXT: lui $4, 255 -; MIPS32-NEXT: ori $4, $4, 65535 -; MIPS32-NEXT: and $1, $1, $3 -; MIPS32-NEXT: and $3, $2, $4 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lw $1, 4($4) +; MIPS32-NEXT: addiu $4, $zero, 65535 +; MIPS32-NEXT: lui $3, 255 +; MIPS32-NEXT: ori $3, $3, 65535 +; MIPS32-NEXT: and $2, $2, $4 +; MIPS32-NEXT: and $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: load7align8: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $1, 0($4) -; MIPS32R6-NEXT: lw $2, 4($4) -; MIPS32R6-NEXT: addiu $3, $zero, 65535 -; MIPS32R6-NEXT: lui $4, 255 -; MIPS32R6-NEXT: ori $4, $4, 65535 -; MIPS32R6-NEXT: and $1, $1, $3 -; MIPS32R6-NEXT: and $3, $2, $4 -; MIPS32R6-NEXT: move $2, $1 +; MIPS32R6-NEXT: lw $2, 0($4) +; MIPS32R6-NEXT: lw $1, 4($4) +; MIPS32R6-NEXT: addiu $4, $zero, 65535 +; MIPS32R6-NEXT: lui $3, 255 +; MIPS32R6-NEXT: ori $3, $3, 65535 +; MIPS32R6-NEXT: and $2, $2, $4 +; MIPS32R6-NEXT: and $3, $1, $3 ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align8* %S to i56* @@ -502,15 +478,15 @@ ; MIPS32-LABEL: load_double_align1: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(double_align1) -; MIPS32-NEXT: addiu $1, $1, %lo(double_align1) +; MIPS32-NEXT: addiu $3, $1, %lo(double_align1) +; MIPS32-NEXT: # implicit-def: $at +; MIPS32-NEXT: lwl $1, 3($3) +; MIPS32-NEXT: lwr $1, 0($3) ; MIPS32-NEXT: # implicit-def: $v0 -; MIPS32-NEXT: lwl $2, 3($1) -; MIPS32-NEXT: lwr $2, 0($1) -; MIPS32-NEXT: # implicit-def: $v1 -; MIPS32-NEXT: lwl $3, 7($1) -; MIPS32-NEXT: lwr $3, 4($1) -; MIPS32-NEXT: mtc1 $2, $f0 -; MIPS32-NEXT: mtc1 $3, $f1 +; MIPS32-NEXT: lwl $2, 7($3) +; MIPS32-NEXT: lwr $2, 4($3) +; MIPS32-NEXT: mtc1 $1, $f0 +; MIPS32-NEXT: mtc1 $2, $f1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; @@ -529,15 +505,15 @@ ; MIPS32-LABEL: load_double_align2: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(double_align2) -; MIPS32-NEXT: addiu $1, $1, %lo(double_align2) +; MIPS32-NEXT: addiu $3, $1, %lo(double_align2) +; MIPS32-NEXT: # implicit-def: $at +; MIPS32-NEXT: lwl $1, 3($3) +; MIPS32-NEXT: lwr $1, 0($3) ; MIPS32-NEXT: # implicit-def: $v0 -; MIPS32-NEXT: lwl $2, 3($1) -; MIPS32-NEXT: lwr $2, 0($1) -; MIPS32-NEXT: # implicit-def: $v1 -; MIPS32-NEXT: lwl $3, 7($1) -; MIPS32-NEXT: lwr $3, 4($1) -; MIPS32-NEXT: mtc1 $2, $f0 -; MIPS32-NEXT: mtc1 $3, $f1 +; MIPS32-NEXT: lwl $2, 7($3) +; MIPS32-NEXT: lwr $2, 4($3) +; MIPS32-NEXT: mtc1 $1, $f0 +; MIPS32-NEXT: mtc1 $2, $f1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; @@ -556,11 +532,11 @@ ; MIPS32-LABEL: load_double_align4: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(double_align4) -; MIPS32-NEXT: addiu $1, $1, %lo(double_align4) -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: lw $1, 4($1) -; MIPS32-NEXT: mtc1 $2, $f0 -; MIPS32-NEXT: mtc1 $1, $f1 +; MIPS32-NEXT: addiu $2, $1, %lo(double_align4) +; MIPS32-NEXT: lw $1, 0($2) +; MIPS32-NEXT: lw $2, 4($2) +; MIPS32-NEXT: mtc1 $1, $f0 +; MIPS32-NEXT: mtc1 $2, $f1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll @@ -6,126 +6,124 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -48 ; MIPS32-NEXT: .cfi_def_cfa_offset 48 +; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 32($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 64 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 68 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 72 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: andi $8, $4, 1 +; MIPS32-NEXT: sw $1, 36($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 68 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 72 +; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 40($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 36($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 32($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB0_12 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB0_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB0_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_7 +; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB0_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB0_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_8 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB0_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB0_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_6: # %b.PHI.1.0 -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB0_9: # %b.PHI.1 -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB0_11 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB0_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB0_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_11: # %b.PHI.1.end -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_14 +; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB0_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB0_15 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_14: # %b.PHI.2.0 -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB0_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB0_16: # %b.PHI.2 -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB0_19 +; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB0_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB0_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_19: # %b.PHI.3 -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $4, $3, 1 -; MIPS32-NEXT: movn $1, $2, $4 -; MIPS32-NEXT: lw $4, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $5, $4, 1 -; MIPS32-NEXT: move $6, $2 -; MIPS32-NEXT: movn $6, $1, $5 -; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $6, 0($1) -; MIPS32-NEXT: sw $2, 0($1) +; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $5, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: move $4, $1 +; MIPS32-NEXT: andi $5, $5, 1 +; MIPS32-NEXT: movn $4, $1, $5 +; MIPS32-NEXT: andi $5, $3, 1 +; MIPS32-NEXT: move $3, $1 +; MIPS32-NEXT: movn $3, $4, $5 +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -193,132 +191,130 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -56 ; MIPS32-NEXT: .cfi_def_cfa_offset 56 +; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 32($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 36($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 72 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 76 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 80 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: ori $8, $zero, 0 -; MIPS32-NEXT: andi $9, $4, 1 +; MIPS32-NEXT: sw $1, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 76 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 80 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 48($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $1, $zero, 0 ; MIPS32-NEXT: sw $1, 52($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 48($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 40($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 36($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $8, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $9, $BB1_12 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB1_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB1_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_7 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB1_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB1_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_8 +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB1_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB1_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB1_9: # %b.PHI.1 -; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: lw $5, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB1_11 +; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sw $3, 12($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB1_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB1_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_11: # %b.PHI.1.end -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_14 +; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB1_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB1_15 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB1_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB1_16: # %b.PHI.2 -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: move $5, $1 -; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB1_19 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: move $3, $2 +; MIPS32-NEXT: sw $3, 12($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB1_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB1_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_19: # %b.PHI.3 -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $3, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $4, $3, 1 -; MIPS32-NEXT: movn $1, $2, $4 -; MIPS32-NEXT: lw $4, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $5, $4, 1 -; MIPS32-NEXT: move $6, $2 -; MIPS32-NEXT: movn $6, $1, $5 -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $6, 0($1) -; MIPS32-NEXT: sw $2, 0($1) +; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $5, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $4, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $5, $5, 1 +; MIPS32-NEXT: movn $4, $1, $5 +; MIPS32-NEXT: andi $5, $3, 1 +; MIPS32-NEXT: move $3, $1 +; MIPS32-NEXT: movn $3, $4, $5 +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -385,126 +381,124 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -48 ; MIPS32-NEXT: .cfi_def_cfa_offset 48 +; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 32($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 64 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 68 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 72 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: andi $8, $4, 1 +; MIPS32-NEXT: sw $1, 36($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 68 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 72 +; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 40($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 36($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 32($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB2_12 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB2_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB2_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_7 +; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB2_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB2_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_8 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB2_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB2_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_6: # %b.PHI.1.0 -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB2_9: # %b.PHI.1 -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB2_11 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB2_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB2_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_11: # %b.PHI.1.end -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_14 +; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB2_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB2_15 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_14: # %b.PHI.2.0 -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB2_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB2_16: # %b.PHI.2 -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: move $4, $1 -; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB2_19 +; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB2_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB2_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_19: # %b.PHI.3 -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $4, $3, 1 -; MIPS32-NEXT: movn $1, $2, $4 -; MIPS32-NEXT: lw $4, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $5, $4, 1 -; MIPS32-NEXT: move $6, $2 -; MIPS32-NEXT: movn $6, $1, $5 -; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $6, 0($1) -; MIPS32-NEXT: sw $2, 0($1) +; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $5, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: move $4, $1 +; MIPS32-NEXT: andi $5, $5, 1 +; MIPS32-NEXT: movn $4, $1, $5 +; MIPS32-NEXT: andi $5, $3, 1 +; MIPS32-NEXT: move $3, $1 +; MIPS32-NEXT: movn $3, $4, $5 +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -572,40 +566,40 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -56 ; MIPS32-NEXT: .cfi_def_cfa_offset 56 +; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 32($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 36($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 72 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 76 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 80 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: ori $8, $zero, 0 -; MIPS32-NEXT: mtc1 $8, $f0 -; MIPS32-NEXT: andi $8, $4, 1 -; MIPS32-NEXT: sw $1, 52($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 48($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 40($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 36($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill -; MIPS32-NEXT: swc1 $f0, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB3_12 +; MIPS32-NEXT: sw $1, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 76 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 80 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 48($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $1, $zero, 0 +; MIPS32-NEXT: mtc1 $1, $f0 +; MIPS32-NEXT: swc1 $f0, 52($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB3_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB3_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_7 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB3_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB3_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_8 +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB3_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB3_6 @@ -617,40 +611,39 @@ ; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB3_9: # %b.PHI.1 -; MIPS32-NEXT: lwc1 $f0, 20($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.s $f1, $f0 -; MIPS32-NEXT: lwc1 $f2, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: swc1 $f0, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lwc1 $f0, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lwc1 $f1, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: swc1 $f1, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: swc1 $f1, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: swc1 $f2, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_11 +; MIPS32-NEXT: swc1 $f0, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB3_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB3_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_11: # %b.PHI.1.end -; MIPS32-NEXT: lwc1 $f0, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lwc1 $f0, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: swc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_14 +; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB3_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB3_15 @@ -662,43 +655,42 @@ ; MIPS32-NEXT: j $BB3_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB3_16: # %b.PHI.2 +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.s $f1, $f0 -; MIPS32-NEXT: mov.s $f2, $f0 ; MIPS32-NEXT: swc1 $f0, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: mov.s $f1, $f0 ; MIPS32-NEXT: swc1 $f1, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: swc1 $f2, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_19 +; MIPS32-NEXT: swc1 $f0, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB3_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB3_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_18: # %b.PHI.2.end ; MIPS32-NEXT: lwc1 $f0, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: swc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_19: # %b.PHI.3 -; MIPS32-NEXT: lwc1 $f0, 8($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lwc1 $f1, 12($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: movn.s $f0, $f1, $2 -; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: mov.s $f2, $f1 +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lwc1 $f0, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lwc1 $f2, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: movn.s $f2, $f0, $3 -; MIPS32-NEXT: lw $3, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: swc1 $f2, 0($3) -; MIPS32-NEXT: swc1 $f1, 0($3) +; MIPS32-NEXT: andi $2, $2, 1 +; MIPS32-NEXT: mov.s $f1, $f0 +; MIPS32-NEXT: movn.s $f1, $f2, $2 +; MIPS32-NEXT: swc1 $f1, 0($1) +; MIPS32-NEXT: swc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll @@ -6,126 +6,124 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -72 ; MIPS32-NEXT: .cfi_def_cfa_offset 72 +; MIPS32-NEXT: sw $4, 44($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 48($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 52($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 56($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 88 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 92 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 96 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: andi $8, $4, 1 +; MIPS32-NEXT: sw $1, 60($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 92 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 64($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 96 +; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: sw $1, 68($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 64($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 60($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 56($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 52($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 48($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB0_12 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB0_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB0_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_7 +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB0_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB0_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_8 +; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB0_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB0_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_6: # %b.PHI.1.0 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB0_9: # %b.PHI.1 +; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.d $f2, $f0 +; MIPS32-NEXT: sdc1 $f0, 16($sp) # 8-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB0_11 +; MIPS32-NEXT: bnez $1, $BB0_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB0_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_11: # %b.PHI.1.end -; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_14 +; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB0_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB0_15 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_14: # %b.PHI.2.0 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB0_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB0_16: # %b.PHI.2 +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.d $f2, $f0 ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB0_19 +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB0_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB0_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_18: # %b.PHI.2.end ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_19: # %b.PHI.3 -; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload -; MIPS32-NEXT: ldc1 $f2, 16($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: movn.d $f0, $f2, $2 -; MIPS32-NEXT: lw $2, 60($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: mov.d $f4, $f2 +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload +; MIPS32-NEXT: mov.d $f4, $f0 +; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: movn.d $f4, $f0, $3 -; MIPS32-NEXT: lw $3, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sdc1 $f4, 0($3) -; MIPS32-NEXT: sdc1 $f2, 0($3) +; MIPS32-NEXT: andi $2, $2, 1 +; MIPS32-NEXT: mov.d $f2, $f0 +; MIPS32-NEXT: movn.d $f2, $f4, $2 +; MIPS32-NEXT: sdc1 $f2, 0($1) +; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -193,39 +191,39 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -80 ; MIPS32-NEXT: .cfi_def_cfa_offset 80 +; MIPS32-NEXT: sw $4, 48($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 52($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 56($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 60($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 96 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 100 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 104 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: ori $8, $zero, 0 -; MIPS32-NEXT: andi $9, $4, 1 +; MIPS32-NEXT: sw $1, 64($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 100 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 68($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 104 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 72($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $1, $zero, 0 ; MIPS32-NEXT: sw $1, 76($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 72($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 68($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 64($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 60($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 56($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 52($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $8, 48($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $9, $BB1_12 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB1_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB1_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_7 +; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB1_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB1_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_8 +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB1_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB1_6 @@ -233,58 +231,56 @@ ; MIPS32-NEXT: $BB1_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: lw $3, 4($1) -; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 4($1) +; MIPS32-NEXT: sw $2, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: lw $3, 4($1) -; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 4($1) +; MIPS32-NEXT: sw $2, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: lw $3, 4($1) -; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 4($1) +; MIPS32-NEXT: sw $2, 40($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $1, 44($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB1_9: # %b.PHI.1 -; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $3, 64($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $4, $3, 1 -; MIPS32-NEXT: move $5, $2 -; MIPS32-NEXT: move $6, $1 -; MIPS32-NEXT: lw $7, 48($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $8, 48($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $1, 36($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 28($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $8, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $4, $BB1_11 +; MIPS32-NEXT: lw $2, 76($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $4, 40($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $3, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $3, $2 +; MIPS32-NEXT: sw $3, 32($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $2, 36($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB1_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB1_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_11: # %b.PHI.1.end -; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $1, 0($2) -; MIPS32-NEXT: lw $3, 36($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $3, 4($2) +; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 72($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: sw $1, 4($2) ; MIPS32-NEXT: addiu $sp, $sp, 80 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_14 +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB1_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB1_15 @@ -292,66 +288,64 @@ ; MIPS32-NEXT: $BB1_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: lw $3, 4($1) -; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 4($1) +; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB1_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) -; MIPS32-NEXT: lw $3, 4($1) -; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 4($1) +; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB1_16: # %b.PHI.2 -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $3, 68($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $4, $3, 1 -; MIPS32-NEXT: move $5, $2 -; MIPS32-NEXT: move $6, $1 -; MIPS32-NEXT: move $7, $2 -; MIPS32-NEXT: move $8, $1 -; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 28($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $8, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $4, $BB1_19 +; MIPS32-NEXT: sw $3, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: move $4, $3 +; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: move $4, $2 +; MIPS32-NEXT: sw $4, 28($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $3, 32($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $2, 36($sp) # 4-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB1_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB1_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $1, 0($2) +; MIPS32-NEXT: lw $2, 72($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $3, 4($2) +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: sw $1, 4($2) ; MIPS32-NEXT: addiu $sp, $sp, 80 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_19: # %b.PHI.3 -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 72($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $5, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $7, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $4, 28($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $5, 64($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $6, $5, 1 -; MIPS32-NEXT: movn $2, $4, $6 -; MIPS32-NEXT: movn $1, $3, $6 -; MIPS32-NEXT: lw $6, 68($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $7, $6, 1 -; MIPS32-NEXT: move $8, $4 -; MIPS32-NEXT: movn $8, $2, $7 -; MIPS32-NEXT: move $2, $3 -; MIPS32-NEXT: movn $2, $1, $7 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $8, 0($1) -; MIPS32-NEXT: sw $2, 4($1) -; MIPS32-NEXT: sw $4, 0($1) -; MIPS32-NEXT: sw $3, 4($1) +; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $4, 32($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $6, 36($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $7, $7, 1 +; MIPS32-NEXT: movn $4, $3, $7 +; MIPS32-NEXT: movn $6, $1, $7 +; MIPS32-NEXT: andi $7, $5, 1 +; MIPS32-NEXT: move $5, $3 +; MIPS32-NEXT: movn $5, $4, $7 +; MIPS32-NEXT: move $4, $1 +; MIPS32-NEXT: movn $4, $6, $7 +; MIPS32-NEXT: sw $5, 0($2) +; MIPS32-NEXT: sw $4, 4($2) +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: sw $1, 4($2) ; MIPS32-NEXT: addiu $sp, $sp, 80 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -418,126 +412,124 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -72 ; MIPS32-NEXT: .cfi_def_cfa_offset 72 +; MIPS32-NEXT: sw $4, 44($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 48($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 52($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 56($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 88 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 92 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 96 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: andi $8, $4, 1 +; MIPS32-NEXT: sw $1, 60($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 92 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 64($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 96 +; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: sw $1, 68($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 64($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 60($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 56($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 52($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 48($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB2_12 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB2_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB2_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_7 +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB2_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB2_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_8 +; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB2_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB2_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_6: # %b.PHI.1.0 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB2_9: # %b.PHI.1 +; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.d $f2, $f0 +; MIPS32-NEXT: sdc1 $f0, 16($sp) # 8-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB2_11 +; MIPS32-NEXT: bnez $1, $BB2_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB2_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_11: # %b.PHI.1.end -; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_14 +; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB2_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB2_15 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_14: # %b.PHI.2.0 -; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB2_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB2_16: # %b.PHI.2 +; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.d $f2, $f0 ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB2_19 +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB2_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB2_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_18: # %b.PHI.2.end ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_19: # %b.PHI.3 -; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload -; MIPS32-NEXT: ldc1 $f2, 16($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: movn.d $f0, $f2, $2 -; MIPS32-NEXT: lw $2, 60($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: mov.d $f4, $f2 +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 48($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload +; MIPS32-NEXT: mov.d $f4, $f0 +; MIPS32-NEXT: andi $3, $3, 1 ; MIPS32-NEXT: movn.d $f4, $f0, $3 -; MIPS32-NEXT: lw $3, 44($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sdc1 $f4, 0($3) -; MIPS32-NEXT: sdc1 $f2, 0($3) +; MIPS32-NEXT: andi $2, $2, 1 +; MIPS32-NEXT: mov.d $f2, $f0 +; MIPS32-NEXT: movn.d $f2, $f4, $2 +; MIPS32-NEXT: sdc1 $f2, 0($1) +; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -605,135 +597,133 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -88 ; MIPS32-NEXT: .cfi_def_cfa_offset 88 +; MIPS32-NEXT: sw $4, 52($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 56($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 60($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 64($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 104 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 108 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 112 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: ori $8, $zero, 0 -; MIPS32-NEXT: ori $9, $zero, 0 -; MIPS32-NEXT: mtc1 $9, $f0 -; MIPS32-NEXT: mtc1 $8, $f1 -; MIPS32-NEXT: andi $8, $4, 1 -; MIPS32-NEXT: sw $1, 84($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $4, 80($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $5, 76($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 72($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 68($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 64($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $3, 60($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sdc1 $f0, 48($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB3_12 +; MIPS32-NEXT: sw $1, 68($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 108 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 72($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 112 +; MIPS32-NEXT: lw $1, 0($1) +; MIPS32-NEXT: sw $1, 76($sp) # 4-byte Folded Spill +; MIPS32-NEXT: ori $2, $zero, 0 +; MIPS32-NEXT: ori $1, $zero, 0 +; MIPS32-NEXT: mtc1 $1, $f0 +; MIPS32-NEXT: mtc1 $2, $f1 +; MIPS32-NEXT: sdc1 $f0, 80($sp) # 8-byte Folded Spill +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB3_12 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB3_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_2: # %pre.PHI.1 -; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_7 +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB3_7 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 ; MIPS32-NEXT: j $BB3_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_4: # %pre.PHI.1.0 -; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_8 +; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB3_8 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 ; MIPS32-NEXT: j $BB3_6 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_6: # %b.PHI.1.0 -; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_7: # %b.PHI.1.1 -; MIPS32-NEXT: lw $1, 84($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_8: # %b.PHI.1.2 -; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB3_9: # %b.PHI.1 -; MIPS32-NEXT: ldc1 $f0, 40($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.d $f2, $f0 -; MIPS32-NEXT: ldc1 $f4, 48($sp) # 8-byte Folded Reload -; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill +; MIPS32-NEXT: ldc1 $f0, 80($sp) # 8-byte Folded Reload +; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload +; MIPS32-NEXT: ldc1 $f2, 40($sp) # 8-byte Folded Reload +; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 ; MIPS32-NEXT: sdc1 $f2, 24($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f4, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_11 +; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB3_11 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.10: # %b.PHI.1 ; MIPS32-NEXT: j $BB3_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_11: # %b.PHI.1.end -; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload +; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 88 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_12: # %pre.PHI.2 -; MIPS32-NEXT: lw $1, 80($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_14 +; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: bnez $1, $BB3_14 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 ; MIPS32-NEXT: j $BB3_15 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_14: # %b.PHI.2.0 -; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB3_16 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_15: # %b.PHI.2.1 -; MIPS32-NEXT: lw $1, 84($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB3_16: # %b.PHI.2 +; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: mov.d $f2, $f0 -; MIPS32-NEXT: mov.d $f4, $f0 ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: mov.d $f2, $f0 ; MIPS32-NEXT: sdc1 $f2, 24($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f4, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_19 +; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill +; MIPS32-NEXT: bnez $1, $BB3_19 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.17: # %b.PHI.2 ; MIPS32-NEXT: j $BB3_18 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_18: # %b.PHI.2.end ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 88 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_19: # %b.PHI.3 -; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload -; MIPS32-NEXT: ldc1 $f2, 24($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: movn.d $f0, $f2, $2 -; MIPS32-NEXT: lw $2, 76($sp) # 4-byte Folded Reload -; MIPS32-NEXT: andi $3, $2, 1 -; MIPS32-NEXT: mov.d $f4, $f2 -; MIPS32-NEXT: movn.d $f4, $f0, $3 +; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 60($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sdc1 $f4, 0($3) -; MIPS32-NEXT: sdc1 $f2, 0($3) +; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload +; MIPS32-NEXT: ldc1 $f4, 32($sp) # 8-byte Folded Reload +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: movn.d $f4, $f0, $3 +; MIPS32-NEXT: andi $2, $2, 1 +; MIPS32-NEXT: mov.d $f2, $f0 +; MIPS32-NEXT: movn.d $f2, $f4, $2 +; MIPS32-NEXT: sdc1 $f2, 0($1) +; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 88 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul.ll @@ -86,13 +86,14 @@ define i64 @mul_i64(i64 %a, i64 %b) { ; MIPS32-LABEL: mul_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: mul $2, $6, $4 -; MIPS32-NEXT: mul $1, $7, $4 -; MIPS32-NEXT: mul $3, $6, $5 -; MIPS32-NEXT: multu $6, $4 -; MIPS32-NEXT: mfhi $4 -; MIPS32-NEXT: addu $1, $1, $3 -; MIPS32-NEXT: addu $3, $1, $4 +; MIPS32-NEXT: move $3, $4 +; MIPS32-NEXT: mul $2, $6, $3 +; MIPS32-NEXT: mul $1, $7, $3 +; MIPS32-NEXT: mul $4, $6, $5 +; MIPS32-NEXT: multu $6, $3 +; MIPS32-NEXT: mfhi $3 +; MIPS32-NEXT: addu $1, $1, $4 +; MIPS32-NEXT: addu $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -103,72 +104,73 @@ define i128 @mul_i128(i128 %a, i128 %b) { ; MIPS32-LABEL: mul_i128: ; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: move $14, $4 +; MIPS32-NEXT: move $13, $5 +; MIPS32-NEXT: move $12, $6 +; MIPS32-NEXT: move $9, $7 ; MIPS32-NEXT: addiu $1, $sp, 16 +; MIPS32-NEXT: lw $6, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 20 +; MIPS32-NEXT: lw $7, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 24 +; MIPS32-NEXT: lw $8, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 28 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 20 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 24 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: addiu $8, $sp, 28 -; MIPS32-NEXT: lw $8, 0($8) -; MIPS32-NEXT: mul $9, $1, $4 -; MIPS32-NEXT: mul $10, $2, $4 -; MIPS32-NEXT: mul $11, $1, $5 -; MIPS32-NEXT: multu $1, $4 -; MIPS32-NEXT: mfhi $12 -; MIPS32-NEXT: addu $10, $10, $11 -; MIPS32-NEXT: sltu $11, $10, $11 -; MIPS32-NEXT: andi $11, $11, 1 -; MIPS32-NEXT: addu $10, $10, $12 -; MIPS32-NEXT: sltu $12, $10, $12 -; MIPS32-NEXT: andi $12, $12, 1 -; MIPS32-NEXT: addu $11, $11, $12 -; MIPS32-NEXT: mul $12, $3, $4 -; MIPS32-NEXT: mul $13, $2, $5 -; MIPS32-NEXT: mul $14, $1, $6 -; MIPS32-NEXT: multu $2, $4 +; MIPS32-NEXT: mul $2, $6, $14 +; MIPS32-NEXT: mul $3, $7, $14 +; MIPS32-NEXT: mul $4, $6, $13 +; MIPS32-NEXT: multu $6, $14 +; MIPS32-NEXT: mfhi $5 +; MIPS32-NEXT: addu $3, $3, $4 +; MIPS32-NEXT: sltu $4, $3, $4 +; MIPS32-NEXT: andi $4, $4, 1 +; MIPS32-NEXT: addu $3, $3, $5 +; MIPS32-NEXT: sltu $5, $3, $5 +; MIPS32-NEXT: andi $5, $5, 1 +; MIPS32-NEXT: addu $10, $4, $5 +; MIPS32-NEXT: mul $4, $8, $14 +; MIPS32-NEXT: mul $5, $7, $13 +; MIPS32-NEXT: mul $24, $6, $12 +; MIPS32-NEXT: multu $7, $14 ; MIPS32-NEXT: mfhi $15 -; MIPS32-NEXT: multu $1, $5 -; MIPS32-NEXT: mfhi $24 -; MIPS32-NEXT: addu $12, $12, $13 -; MIPS32-NEXT: sltu $13, $12, $13 -; MIPS32-NEXT: andi $13, $13, 1 -; MIPS32-NEXT: addu $12, $12, $14 -; MIPS32-NEXT: sltu $14, $12, $14 -; MIPS32-NEXT: andi $14, $14, 1 -; MIPS32-NEXT: addu $13, $13, $14 -; MIPS32-NEXT: addu $12, $12, $15 -; MIPS32-NEXT: sltu $14, $12, $15 -; MIPS32-NEXT: andi $14, $14, 1 -; MIPS32-NEXT: addu $13, $13, $14 -; MIPS32-NEXT: addu $12, $12, $24 -; MIPS32-NEXT: sltu $14, $12, $24 -; MIPS32-NEXT: andi $14, $14, 1 -; MIPS32-NEXT: addu $13, $13, $14 -; MIPS32-NEXT: addu $12, $12, $11 -; MIPS32-NEXT: sltu $11, $12, $11 +; MIPS32-NEXT: multu $6, $13 +; MIPS32-NEXT: mfhi $11 +; MIPS32-NEXT: addu $4, $4, $5 +; MIPS32-NEXT: sltu $5, $4, $5 +; MIPS32-NEXT: andi $5, $5, 1 +; MIPS32-NEXT: addu $4, $4, $24 +; MIPS32-NEXT: sltu $24, $4, $24 +; MIPS32-NEXT: andi $24, $24, 1 +; MIPS32-NEXT: addu $5, $5, $24 +; MIPS32-NEXT: addu $4, $4, $15 +; MIPS32-NEXT: sltu $15, $4, $15 +; MIPS32-NEXT: andi $15, $15, 1 +; MIPS32-NEXT: addu $5, $5, $15 +; MIPS32-NEXT: addu $4, $4, $11 +; MIPS32-NEXT: sltu $11, $4, $11 ; MIPS32-NEXT: andi $11, $11, 1 -; MIPS32-NEXT: addu $11, $13, $11 -; MIPS32-NEXT: mul $8, $8, $4 -; MIPS32-NEXT: mul $13, $3, $5 -; MIPS32-NEXT: mul $14, $2, $6 -; MIPS32-NEXT: mul $7, $1, $7 -; MIPS32-NEXT: multu $3, $4 -; MIPS32-NEXT: mfhi $3 -; MIPS32-NEXT: multu $2, $5 -; MIPS32-NEXT: mfhi $2 -; MIPS32-NEXT: multu $1, $6 -; MIPS32-NEXT: mfhi $1 -; MIPS32-NEXT: addu $4, $8, $13 -; MIPS32-NEXT: addu $4, $4, $14 -; MIPS32-NEXT: addu $4, $4, $7 -; MIPS32-NEXT: addu $3, $4, $3 -; MIPS32-NEXT: addu $2, $3, $2 -; MIPS32-NEXT: addu $1, $2, $1 -; MIPS32-NEXT: addu $5, $1, $11 -; MIPS32-NEXT: move $2, $9 -; MIPS32-NEXT: move $3, $10 -; MIPS32-NEXT: move $4, $12 +; MIPS32-NEXT: addu $5, $5, $11 +; MIPS32-NEXT: addu $4, $4, $10 +; MIPS32-NEXT: sltu $10, $4, $10 +; MIPS32-NEXT: andi $10, $10, 1 +; MIPS32-NEXT: addu $5, $5, $10 +; MIPS32-NEXT: mul $1, $1, $14 +; MIPS32-NEXT: mul $11, $8, $13 +; MIPS32-NEXT: mul $10, $7, $12 +; MIPS32-NEXT: mul $9, $6, $9 +; MIPS32-NEXT: multu $8, $14 +; MIPS32-NEXT: mfhi $8 +; MIPS32-NEXT: multu $7, $13 +; MIPS32-NEXT: mfhi $7 +; MIPS32-NEXT: multu $6, $12 +; MIPS32-NEXT: mfhi $6 +; MIPS32-NEXT: addu $1, $1, $11 +; MIPS32-NEXT: addu $1, $1, $10 +; MIPS32-NEXT: addu $1, $1, $9 +; MIPS32-NEXT: addu $1, $1, $8 +; MIPS32-NEXT: addu $1, $1, $7 +; MIPS32-NEXT: addu $1, $1, $6 +; MIPS32-NEXT: addu $5, $1, $5 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/mul_vec.ll @@ -4,9 +4,9 @@ define void @mul_v16i8(<16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %c) { ; P5600-LABEL: mul_v16i8: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.b $w0, 0($4) -; P5600-NEXT: ld.b $w1, 0($5) -; P5600-NEXT: mulv.b $w0, $w1, $w0 +; P5600-NEXT: ld.b $w1, 0($4) +; P5600-NEXT: ld.b $w0, 0($5) +; P5600-NEXT: mulv.b $w0, $w0, $w1 ; P5600-NEXT: st.b $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -21,9 +21,9 @@ define void @mul_v8i16(<8 x i16>* %a, <8 x i16>* %b, <8 x i16>* %c) { ; P5600-LABEL: mul_v8i16: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.h $w0, 0($4) -; P5600-NEXT: ld.h $w1, 0($5) -; P5600-NEXT: mulv.h $w0, $w1, $w0 +; P5600-NEXT: ld.h $w1, 0($4) +; P5600-NEXT: ld.h $w0, 0($5) +; P5600-NEXT: mulv.h $w0, $w0, $w1 ; P5600-NEXT: st.h $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -38,9 +38,9 @@ define void @mul_v4i32(<4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c) { ; P5600-LABEL: mul_v4i32: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.w $w0, 0($4) -; P5600-NEXT: ld.w $w1, 0($5) -; P5600-NEXT: mulv.w $w0, $w1, $w0 +; P5600-NEXT: ld.w $w1, 0($4) +; P5600-NEXT: ld.w $w0, 0($5) +; P5600-NEXT: mulv.w $w0, $w0, $w1 ; P5600-NEXT: st.w $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -55,9 +55,9 @@ define void @mul_v2i64(<2 x i64>* %a, <2 x i64>* %b, <2 x i64>* %c) { ; P5600-LABEL: mul_v2i64: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.d $w0, 0($4) -; P5600-NEXT: ld.d $w1, 0($5) -; P5600-NEXT: mulv.d $w0, $w1, $w0 +; P5600-NEXT: ld.d $w1, 0($4) +; P5600-NEXT: ld.d $w0, 0($5) +; P5600-NEXT: mulv.d $w0, $w0, $w1 ; P5600-NEXT: st.d $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/phi.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/phi.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/phi.ll @@ -6,25 +6,24 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -16 ; MIPS32-NEXT: .cfi_def_cfa_offset 16 +; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill ; MIPS32-NEXT: bnez $1, $BB0_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB0_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_2: # %cond.true -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB0_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB0_3: # %cond.false -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB0_4: # %cond.end -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 16 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -47,25 +46,24 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -16 ; MIPS32-NEXT: .cfi_def_cfa_offset 16 +; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill ; MIPS32-NEXT: bnez $1, $BB1_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB1_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_2: # %cond.true -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB1_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB1_3: # %cond.false -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB1_4: # %cond.end -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 16 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -88,25 +86,24 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -16 ; MIPS32-NEXT: .cfi_def_cfa_offset 16 +; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill ; MIPS32-NEXT: bnez $1, $BB2_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB2_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_2: # %cond.true -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB2_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB2_3: # %cond.false -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB2_4: # %cond.end -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 16 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -129,25 +126,24 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -16 ; MIPS32-NEXT: .cfi_def_cfa_offset 16 +; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill ; MIPS32-NEXT: bnez $1, $BB3_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB3_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_2: # %cond.true -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB3_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB3_3: # %cond.false -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB3_4: # %cond.end -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 16 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -170,36 +166,35 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -24 ; MIPS32-NEXT: .cfi_def_cfa_offset 24 +; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 40 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 44 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: andi $3, $4, 1 +; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill +; MIPS32-NEXT: addiu $1, $sp, 44 +; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: sw $1, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB4_2 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB4_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB4_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB4_2: # %cond.true -; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB4_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB4_3: # %cond.false ; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload -; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $2, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB4_4: # %cond.end -; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: move $3, $1 +; MIPS32-NEXT: lw $2, 0($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $3, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: addiu $sp, $sp, 24 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop @@ -222,12 +217,12 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -32 ; MIPS32-NEXT: .cfi_def_cfa_offset 32 +; MIPS32-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: ldc1 $f0, 0($5) -; MIPS32-NEXT: ldc1 $f2, 0($6) -; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sdc1 $f0, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f2, 8($sp) # 8-byte Folded Spill +; MIPS32-NEXT: ldc1 $f0, 0($6) +; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill +; MIPS32-NEXT: andi $1, $4, 1 ; MIPS32-NEXT: bnez $1, $BB5_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry @@ -239,11 +234,11 @@ ; MIPS32-NEXT: j $BB5_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB5_3: # %cond.false -; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB5_4: # %cond.end +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload -; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 32 ; MIPS32-NEXT: jr $ra @@ -270,21 +265,21 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -16 ; MIPS32-NEXT: .cfi_def_cfa_offset 16 +; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $6, 8($sp) # 4-byte Folded Spill ; MIPS32-NEXT: bnez $1, $BB6_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB6_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB6_2: # %cond.true -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB6_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB6_3: # %cond.false -; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB6_4: # %cond.end ; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload @@ -311,28 +306,28 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -16 ; MIPS32-NEXT: .cfi_def_cfa_offset 16 +; MIPS32-NEXT: sw $7, 4($sp) # 4-byte Folded Spill ; MIPS32-NEXT: lw $1, 0($5) -; MIPS32-NEXT: lw $2, 0($6) -; MIPS32-NEXT: andi $3, $4, 1 +; MIPS32-NEXT: sw $1, 8($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $1, 0($6) ; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $7, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB7_2 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: bnez $1, $BB7_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB7_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB7_2: # %cond.true -; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: j $BB7_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB7_3: # %cond.false -; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: $BB7_4: # %cond.end +; MIPS32-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 16 ; MIPS32-NEXT: jr $ra @@ -359,23 +354,23 @@ ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: addiu $sp, $sp, -24 ; MIPS32-NEXT: .cfi_def_cfa_offset 24 +; MIPS32-NEXT: sdc1 $f12, 8($sp) # 8-byte Folded Spill +; MIPS32-NEXT: sdc1 $f14, 16($sp) # 8-byte Folded Spill ; MIPS32-NEXT: addiu $1, $sp, 40 ; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: sdc1 $f12, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: sdc1 $f14, 8($sp) # 8-byte Folded Spill ; MIPS32-NEXT: bnez $1, $BB8_2 ; MIPS32-NEXT: nop ; MIPS32-NEXT: # %bb.1: # %entry ; MIPS32-NEXT: j $BB8_3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB8_2: # %cond.true -; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill ; MIPS32-NEXT: j $BB8_4 ; MIPS32-NEXT: nop ; MIPS32-NEXT: $BB8_3: # %cond.false -; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload +; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill ; MIPS32-NEXT: $BB8_4: # %cond.end ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/rem_and_div.ll @@ -6,11 +6,11 @@ ; MIPS32-LABEL: sdiv_i8: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sll $1, $5, 24 +; MIPS32-NEXT: sra $2, $1, 24 +; MIPS32-NEXT: sll $1, $4, 24 ; MIPS32-NEXT: sra $1, $1, 24 -; MIPS32-NEXT: sll $2, $4, 24 -; MIPS32-NEXT: sra $2, $2, 24 -; MIPS32-NEXT: div $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: div $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mflo $1 ; MIPS32-NEXT: sll $1, $1, 24 ; MIPS32-NEXT: sra $2, $1, 24 @@ -25,11 +25,11 @@ ; MIPS32-LABEL: sdiv_i16: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sll $1, $5, 16 +; MIPS32-NEXT: sra $2, $1, 16 +; MIPS32-NEXT: sll $1, $4, 16 ; MIPS32-NEXT: sra $1, $1, 16 -; MIPS32-NEXT: sll $2, $4, 16 -; MIPS32-NEXT: sra $2, $2, 16 -; MIPS32-NEXT: div $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: div $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mflo $1 ; MIPS32-NEXT: sll $1, $1, 16 ; MIPS32-NEXT: sra $2, $1, 16 @@ -60,12 +60,12 @@ ; MIPS32-NEXT: .cfi_def_cfa_offset 32 ; MIPS32-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: .cfi_offset 31, -4 -; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: move $4, $6 -; MIPS32-NEXT: sw $5, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $6, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: move $5, $7 -; MIPS32-NEXT: lw $6, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $7, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $7, 24($sp) # 4-byte Folded Reload ; MIPS32-NEXT: jal __divdi3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload @@ -82,11 +82,11 @@ ; MIPS32-LABEL: srem_i8: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sll $1, $5, 24 +; MIPS32-NEXT: sra $2, $1, 24 +; MIPS32-NEXT: sll $1, $4, 24 ; MIPS32-NEXT: sra $1, $1, 24 -; MIPS32-NEXT: sll $2, $4, 24 -; MIPS32-NEXT: sra $2, $2, 24 -; MIPS32-NEXT: div $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: div $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mflo $1 ; MIPS32-NEXT: sll $1, $1, 24 ; MIPS32-NEXT: sra $2, $1, 24 @@ -101,11 +101,11 @@ ; MIPS32-LABEL: srem_i16: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: sll $1, $5, 16 +; MIPS32-NEXT: sra $2, $1, 16 +; MIPS32-NEXT: sll $1, $4, 16 ; MIPS32-NEXT: sra $1, $1, 16 -; MIPS32-NEXT: sll $2, $4, 16 -; MIPS32-NEXT: sra $2, $2, 16 -; MIPS32-NEXT: div $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: div $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mfhi $1 ; MIPS32-NEXT: sll $1, $1, 16 ; MIPS32-NEXT: sra $2, $1, 16 @@ -136,12 +136,12 @@ ; MIPS32-NEXT: .cfi_def_cfa_offset 32 ; MIPS32-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: .cfi_offset 31, -4 -; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: move $4, $6 -; MIPS32-NEXT: sw $5, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $6, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: move $5, $7 -; MIPS32-NEXT: lw $6, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $7, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $7, 24($sp) # 4-byte Folded Reload ; MIPS32-NEXT: jal __moddi3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload @@ -157,10 +157,10 @@ define signext i8 @udiv_i8(i8 signext %a, i8 signext %b) { ; MIPS32-LABEL: udiv_i8: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $5, 255 -; MIPS32-NEXT: andi $2, $4, 255 -; MIPS32-NEXT: divu $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: andi $2, $5, 255 +; MIPS32-NEXT: andi $1, $4, 255 +; MIPS32-NEXT: divu $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mflo $1 ; MIPS32-NEXT: sll $1, $1, 24 ; MIPS32-NEXT: sra $2, $1, 24 @@ -174,10 +174,10 @@ define signext i16 @udiv_i16(i16 signext %a, i16 signext %b) { ; MIPS32-LABEL: udiv_i16: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $5, 65535 -; MIPS32-NEXT: andi $2, $4, 65535 -; MIPS32-NEXT: divu $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: andi $2, $5, 65535 +; MIPS32-NEXT: andi $1, $4, 65535 +; MIPS32-NEXT: divu $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mflo $1 ; MIPS32-NEXT: sll $1, $1, 16 ; MIPS32-NEXT: sra $2, $1, 16 @@ -208,12 +208,12 @@ ; MIPS32-NEXT: .cfi_def_cfa_offset 32 ; MIPS32-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: .cfi_offset 31, -4 -; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: move $4, $6 -; MIPS32-NEXT: sw $5, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $6, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: move $5, $7 -; MIPS32-NEXT: lw $6, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $7, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $7, 24($sp) # 4-byte Folded Reload ; MIPS32-NEXT: jal __udivdi3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload @@ -229,10 +229,10 @@ define signext i8 @urem_i8(i8 signext %a, i8 signext %b) { ; MIPS32-LABEL: urem_i8: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $5, 255 -; MIPS32-NEXT: andi $2, $4, 255 -; MIPS32-NEXT: divu $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: andi $2, $5, 255 +; MIPS32-NEXT: andi $1, $4, 255 +; MIPS32-NEXT: divu $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mfhi $1 ; MIPS32-NEXT: sll $1, $1, 24 ; MIPS32-NEXT: sra $2, $1, 24 @@ -246,10 +246,10 @@ define signext i16 @urem_i16(i16 signext %a, i16 signext %b) { ; MIPS32-LABEL: urem_i16: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $5, 65535 -; MIPS32-NEXT: andi $2, $4, 65535 -; MIPS32-NEXT: divu $zero, $1, $2 -; MIPS32-NEXT: teq $2, $zero, 7 +; MIPS32-NEXT: andi $2, $5, 65535 +; MIPS32-NEXT: andi $1, $4, 65535 +; MIPS32-NEXT: divu $zero, $2, $1 +; MIPS32-NEXT: teq $1, $zero, 7 ; MIPS32-NEXT: mfhi $1 ; MIPS32-NEXT: sll $1, $1, 16 ; MIPS32-NEXT: sra $2, $1, 16 @@ -280,12 +280,12 @@ ; MIPS32-NEXT: .cfi_def_cfa_offset 32 ; MIPS32-NEXT: sw $ra, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: .cfi_offset 31, -4 -; MIPS32-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: sw $5, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: move $4, $6 -; MIPS32-NEXT: sw $5, 20($sp) # 4-byte Folded Spill +; MIPS32-NEXT: lw $6, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: move $5, $7 -; MIPS32-NEXT: lw $6, 24($sp) # 4-byte Folded Reload -; MIPS32-NEXT: lw $7, 20($sp) # 4-byte Folded Reload +; MIPS32-NEXT: lw $7, 24($sp) # 4-byte Folded Reload ; MIPS32-NEXT: jal __umoddi3 ; MIPS32-NEXT: nop ; MIPS32-NEXT: lw $ra, 28($sp) # 4-byte Folded Reload Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/select.ll @@ -4,9 +4,9 @@ define i8 @select_i8(i1 %test, i8 %a, i8 %b) { ; MIPS32-LABEL: select_i8: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: movn $6, $5, $1 ; MIPS32-NEXT: move $2, $6 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: movn $2, $5, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -17,9 +17,9 @@ define i16 @select_i16(i1 %test, i16 %a, i16 %b) { ; MIPS32-LABEL: select_i16: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: movn $6, $5, $1 ; MIPS32-NEXT: move $2, $6 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: movn $2, $5, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -30,9 +30,9 @@ define i32 @select_i32(i1 %test, i32 %a, i32 %b) { ; MIPS32-LABEL: select_i32: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: movn $6, $5, $1 ; MIPS32-NEXT: move $2, $6 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: movn $2, $5, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -43,9 +43,9 @@ define i32* @select_ptr(i1 %test, i32* %a, i32* %b) { ; MIPS32-LABEL: select_ptr: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: movn $6, $5, $1 ; MIPS32-NEXT: move $2, $6 +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: movn $2, $5, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -56,12 +56,12 @@ define i32 @select_with_negation(i32 %a, i32 %b, i32 %x, i32 %y) { ; MIPS32-LABEL: select_with_negation: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $1, $zero, 1 -; MIPS32-NEXT: slt $2, $4, $5 -; MIPS32-NEXT: xor $1, $2, $1 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn $7, $6, $1 ; MIPS32-NEXT: move $2, $7 +; MIPS32-NEXT: ori $3, $zero, 1 +; MIPS32-NEXT: slt $1, $4, $5 +; MIPS32-NEXT: xor $1, $1, $3 +; MIPS32-NEXT: andi $1, $1, 1 +; MIPS32-NEXT: movn $2, $6, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -74,19 +74,13 @@ define i64 @select_i64(i1 %test, i64 %a, i64 %b) { ; MIPS32-LABEL: select_i64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: addiu $sp, $sp, -8 -; MIPS32-NEXT: .cfi_def_cfa_offset 8 -; MIPS32-NEXT: addiu $1, $sp, 24 -; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 28 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: andi $3, $4, 1 -; MIPS32-NEXT: movn $1, $6, $3 -; MIPS32-NEXT: movn $2, $7, $3 -; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: move $2, $1 -; MIPS32-NEXT: lw $3, 4($sp) # 4-byte Folded Reload -; MIPS32-NEXT: addiu $sp, $sp, 8 +; MIPS32-NEXT: addiu $1, $sp, 16 +; MIPS32-NEXT: lw $2, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 20 +; MIPS32-NEXT: lw $3, 0($1) +; MIPS32-NEXT: andi $1, $4, 1 +; MIPS32-NEXT: movn $2, $6, $1 +; MIPS32-NEXT: movn $3, $7, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -97,11 +91,11 @@ define void @select_ambiguous_i64_in_fpr(i1 %test, i64* %i64_ptr_a, i64* %i64_ptr_b, i64* %i64_ptr_c) { ; MIPS32-LABEL: select_ambiguous_i64_in_fpr: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ldc1 $f0, 0($5) -; MIPS32-NEXT: ldc1 $f2, 0($6) +; MIPS32-NEXT: ldc1 $f2, 0($5) +; MIPS32-NEXT: ldc1 $f0, 0($6) ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: movn.d $f2, $f0, $1 -; MIPS32-NEXT: sdc1 $f2, 0($7) +; MIPS32-NEXT: movn.d $f0, $f2, $1 +; MIPS32-NEXT: sdc1 $f0, 0($7) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -116,10 +110,9 @@ ; MIPS32-LABEL: select_float: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: andi $1, $4, 1 -; MIPS32-NEXT: mtc1 $5, $f0 -; MIPS32-NEXT: mtc1 $6, $f1 -; MIPS32-NEXT: movn.s $f1, $f0, $1 -; MIPS32-NEXT: mov.s $f0, $f1 +; MIPS32-NEXT: mtc1 $5, $f1 +; MIPS32-NEXT: mtc1 $6, $f0 +; MIPS32-NEXT: movn.s $f0, $f1, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -130,11 +123,11 @@ define void @select_ambiguous_float_in_gpr(i1 %test, float* %f32_ptr_a, float* %f32_ptr_b, float* %f32_ptr_c) { ; MIPS32-LABEL: select_ambiguous_float_in_gpr: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($5) -; MIPS32-NEXT: lw $2, 0($6) +; MIPS32-NEXT: lw $2, 0($5) +; MIPS32-NEXT: lw $1, 0($6) ; MIPS32-NEXT: andi $3, $4, 1 -; MIPS32-NEXT: movn $2, $1, $3 -; MIPS32-NEXT: sw $2, 0($7) +; MIPS32-NEXT: movn $1, $2, $3 +; MIPS32-NEXT: sw $1, 0($7) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -148,11 +141,11 @@ define double @select_double(double %a, double %b, i1 %test) { ; MIPS32-LABEL: select_double: ; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: mov.d $f0, $f14 ; MIPS32-NEXT: addiu $1, $sp, 16 ; MIPS32-NEXT: lw $1, 0($1) ; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: movn.d $f14, $f12, $1 -; MIPS32-NEXT: mov.d $f0, $f14 +; MIPS32-NEXT: movn.d $f0, $f12, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sitofp_and_uitofp.ll @@ -143,10 +143,10 @@ ; FP32-NEXT: lui $1, 17200 ; FP32-NEXT: mtc1 $4, $f0 ; FP32-NEXT: mtc1 $1, $f1 -; FP32-NEXT: lui $1, 17200 -; FP32-NEXT: ori $2, $zero, 0 -; FP32-NEXT: mtc1 $2, $f2 -; FP32-NEXT: mtc1 $1, $f3 +; FP32-NEXT: lui $2, 17200 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f2 +; FP32-NEXT: mtc1 $2, $f3 ; FP32-NEXT: sub.d $f0, $f0, $f2 ; FP32-NEXT: cvt.s.d $f0, $f0 ; FP32-NEXT: jr $ra @@ -157,10 +157,10 @@ ; FP64-NEXT: lui $1, 17200 ; FP64-NEXT: mtc1 $4, $f0 ; FP64-NEXT: mthc1 $1, $f0 -; FP64-NEXT: lui $1, 17200 -; FP64-NEXT: ori $2, $zero, 0 -; FP64-NEXT: mtc1 $2, $f1 -; FP64-NEXT: mthc1 $1, $f1 +; FP64-NEXT: lui $2, 17200 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f1 +; FP64-NEXT: mthc1 $2, $f1 ; FP64-NEXT: sub.d $f0, $f0, $f1 ; FP64-NEXT: cvt.s.d $f0, $f0 ; FP64-NEXT: jr $ra @@ -177,10 +177,10 @@ ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: mtc1 $1, $f0 ; FP32-NEXT: mtc1 $2, $f1 -; FP32-NEXT: lui $1, 17200 -; FP32-NEXT: ori $2, $zero, 0 -; FP32-NEXT: mtc1 $2, $f2 -; FP32-NEXT: mtc1 $1, $f3 +; FP32-NEXT: lui $2, 17200 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f2 +; FP32-NEXT: mtc1 $2, $f3 ; FP32-NEXT: sub.d $f0, $f0, $f2 ; FP32-NEXT: cvt.s.d $f0, $f0 ; FP32-NEXT: jr $ra @@ -192,10 +192,10 @@ ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: mtc1 $1, $f0 ; FP64-NEXT: mthc1 $2, $f0 -; FP64-NEXT: lui $1, 17200 -; FP64-NEXT: ori $2, $zero, 0 -; FP64-NEXT: mtc1 $2, $f1 -; FP64-NEXT: mthc1 $1, $f1 +; FP64-NEXT: lui $2, 17200 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f1 +; FP64-NEXT: mthc1 $2, $f1 ; FP64-NEXT: sub.d $f0, $f0, $f1 ; FP64-NEXT: cvt.s.d $f0, $f0 ; FP64-NEXT: jr $ra @@ -212,10 +212,10 @@ ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: mtc1 $1, $f0 ; FP32-NEXT: mtc1 $2, $f1 -; FP32-NEXT: lui $1, 17200 -; FP32-NEXT: ori $2, $zero, 0 -; FP32-NEXT: mtc1 $2, $f2 -; FP32-NEXT: mtc1 $1, $f3 +; FP32-NEXT: lui $2, 17200 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f2 +; FP32-NEXT: mtc1 $2, $f3 ; FP32-NEXT: sub.d $f0, $f0, $f2 ; FP32-NEXT: cvt.s.d $f0, $f0 ; FP32-NEXT: jr $ra @@ -227,10 +227,10 @@ ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: mtc1 $1, $f0 ; FP64-NEXT: mthc1 $2, $f0 -; FP64-NEXT: lui $1, 17200 -; FP64-NEXT: ori $2, $zero, 0 -; FP64-NEXT: mtc1 $2, $f1 -; FP64-NEXT: mthc1 $1, $f1 +; FP64-NEXT: lui $2, 17200 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f1 +; FP64-NEXT: mthc1 $2, $f1 ; FP64-NEXT: sub.d $f0, $f0, $f1 ; FP64-NEXT: cvt.s.d $f0, $f0 ; FP64-NEXT: jr $ra @@ -264,10 +264,10 @@ ; FP32-NEXT: lui $1, 17200 ; FP32-NEXT: mtc1 $4, $f0 ; FP32-NEXT: mtc1 $1, $f1 -; FP32-NEXT: lui $1, 17200 -; FP32-NEXT: ori $2, $zero, 0 -; FP32-NEXT: mtc1 $2, $f2 -; FP32-NEXT: mtc1 $1, $f3 +; FP32-NEXT: lui $2, 17200 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f2 +; FP32-NEXT: mtc1 $2, $f3 ; FP32-NEXT: sub.d $f0, $f0, $f2 ; FP32-NEXT: jr $ra ; FP32-NEXT: nop @@ -277,10 +277,10 @@ ; FP64-NEXT: lui $1, 17200 ; FP64-NEXT: mtc1 $4, $f0 ; FP64-NEXT: mthc1 $1, $f0 -; FP64-NEXT: lui $1, 17200 -; FP64-NEXT: ori $2, $zero, 0 -; FP64-NEXT: mtc1 $2, $f1 -; FP64-NEXT: mthc1 $1, $f1 +; FP64-NEXT: lui $2, 17200 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f1 +; FP64-NEXT: mthc1 $2, $f1 ; FP64-NEXT: sub.d $f0, $f0, $f1 ; FP64-NEXT: jr $ra ; FP64-NEXT: nop @@ -296,10 +296,10 @@ ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: mtc1 $1, $f0 ; FP32-NEXT: mtc1 $2, $f1 -; FP32-NEXT: lui $1, 17200 -; FP32-NEXT: ori $2, $zero, 0 -; FP32-NEXT: mtc1 $2, $f2 -; FP32-NEXT: mtc1 $1, $f3 +; FP32-NEXT: lui $2, 17200 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f2 +; FP32-NEXT: mtc1 $2, $f3 ; FP32-NEXT: sub.d $f0, $f0, $f2 ; FP32-NEXT: jr $ra ; FP32-NEXT: nop @@ -310,10 +310,10 @@ ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: mtc1 $1, $f0 ; FP64-NEXT: mthc1 $2, $f0 -; FP64-NEXT: lui $1, 17200 -; FP64-NEXT: ori $2, $zero, 0 -; FP64-NEXT: mtc1 $2, $f1 -; FP64-NEXT: mthc1 $1, $f1 +; FP64-NEXT: lui $2, 17200 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f1 +; FP64-NEXT: mthc1 $2, $f1 ; FP64-NEXT: sub.d $f0, $f0, $f1 ; FP64-NEXT: jr $ra ; FP64-NEXT: nop @@ -329,10 +329,10 @@ ; FP32-NEXT: lui $2, 17200 ; FP32-NEXT: mtc1 $1, $f0 ; FP32-NEXT: mtc1 $2, $f1 -; FP32-NEXT: lui $1, 17200 -; FP32-NEXT: ori $2, $zero, 0 -; FP32-NEXT: mtc1 $2, $f2 -; FP32-NEXT: mtc1 $1, $f3 +; FP32-NEXT: lui $2, 17200 +; FP32-NEXT: ori $1, $zero, 0 +; FP32-NEXT: mtc1 $1, $f2 +; FP32-NEXT: mtc1 $2, $f3 ; FP32-NEXT: sub.d $f0, $f0, $f2 ; FP32-NEXT: jr $ra ; FP32-NEXT: nop @@ -343,10 +343,10 @@ ; FP64-NEXT: lui $2, 17200 ; FP64-NEXT: mtc1 $1, $f0 ; FP64-NEXT: mthc1 $2, $f0 -; FP64-NEXT: lui $1, 17200 -; FP64-NEXT: ori $2, $zero, 0 -; FP64-NEXT: mtc1 $2, $f1 -; FP64-NEXT: mthc1 $1, $f1 +; FP64-NEXT: lui $2, 17200 +; FP64-NEXT: ori $1, $zero, 0 +; FP64-NEXT: mtc1 $1, $f1 +; FP64-NEXT: mthc1 $2, $f1 ; FP64-NEXT: sub.d $f0, $f0, $f1 ; FP64-NEXT: jr $ra ; FP64-NEXT: nop Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_4_unaligned.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_4_unaligned.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_4_unaligned.ll @@ -15,10 +15,10 @@ ; MIPS32-LABEL: store_float_align1: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(float_align1) -; MIPS32-NEXT: addiu $1, $1, %lo(float_align1) -; MIPS32-NEXT: mfc1 $2, $f12 -; MIPS32-NEXT: swl $2, 3($1) -; MIPS32-NEXT: swr $2, 0($1) +; MIPS32-NEXT: addiu $2, $1, %lo(float_align1) +; MIPS32-NEXT: mfc1 $1, $f12 +; MIPS32-NEXT: swl $1, 3($2) +; MIPS32-NEXT: swr $1, 0($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; @@ -37,10 +37,10 @@ ; MIPS32-LABEL: store_float_align2: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(float_align2) -; MIPS32-NEXT: addiu $1, $1, %lo(float_align2) -; MIPS32-NEXT: mfc1 $2, $f12 -; MIPS32-NEXT: swl $2, 3($1) -; MIPS32-NEXT: swr $2, 0($1) +; MIPS32-NEXT: addiu $2, $1, %lo(float_align2) +; MIPS32-NEXT: mfc1 $1, $f12 +; MIPS32-NEXT: swl $1, 3($2) +; MIPS32-NEXT: swr $1, 0($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_split_because_of_memsize_or_align.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_split_because_of_memsize_or_align.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/store_split_because_of_memsize_or_align.ll @@ -204,12 +204,12 @@ ; MIPS32-LABEL: store6align1: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $1, $zero, 4 -; MIPS32-NEXT: addu $1, $4, $1 +; MIPS32-NEXT: addu $2, $4, $1 ; MIPS32-NEXT: swl $6, 3($4) ; MIPS32-NEXT: swr $6, 0($4) ; MIPS32-NEXT: sb $7, 4($4) -; MIPS32-NEXT: srl $2, $7, 8 -; MIPS32-NEXT: sb $2, 1($1) +; MIPS32-NEXT: srl $1, $7, 8 +; MIPS32-NEXT: sb $1, 1($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; @@ -290,25 +290,25 @@ ; MIPS32-LABEL: store7align1: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $1, $zero, 4 -; MIPS32-NEXT: addu $1, $4, $1 +; MIPS32-NEXT: addu $2, $4, $1 ; MIPS32-NEXT: swl $6, 3($4) ; MIPS32-NEXT: swr $6, 0($4) ; MIPS32-NEXT: sb $7, 4($4) -; MIPS32-NEXT: srl $2, $7, 8 -; MIPS32-NEXT: sb $2, 1($1) -; MIPS32-NEXT: srl $2, $7, 16 -; MIPS32-NEXT: sb $2, 2($1) +; MIPS32-NEXT: srl $1, $7, 8 +; MIPS32-NEXT: sb $1, 1($2) +; MIPS32-NEXT: srl $1, $7, 16 +; MIPS32-NEXT: sb $1, 2($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: store7align1: ; MIPS32R6: # %bb.0: # %entry ; MIPS32R6-NEXT: ori $1, $zero, 4 -; MIPS32R6-NEXT: addu $1, $4, $1 +; MIPS32R6-NEXT: addu $2, $4, $1 ; MIPS32R6-NEXT: sw $6, 0($4) ; MIPS32R6-NEXT: sh $7, 4($4) -; MIPS32R6-NEXT: srl $2, $7, 16 -; MIPS32R6-NEXT: sb $2, 2($1) +; MIPS32R6-NEXT: srl $1, $7, 16 +; MIPS32R6-NEXT: sb $1, 2($2) ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align1* %S to i56* @@ -321,23 +321,23 @@ ; MIPS32-LABEL: store7align2: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $1, $zero, 4 -; MIPS32-NEXT: addu $1, $4, $1 +; MIPS32-NEXT: addu $2, $4, $1 ; MIPS32-NEXT: swl $6, 3($4) ; MIPS32-NEXT: swr $6, 0($4) ; MIPS32-NEXT: sh $7, 4($4) -; MIPS32-NEXT: srl $2, $7, 16 -; MIPS32-NEXT: sb $2, 2($1) +; MIPS32-NEXT: srl $1, $7, 16 +; MIPS32-NEXT: sb $1, 2($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: store7align2: ; MIPS32R6: # %bb.0: # %entry ; MIPS32R6-NEXT: ori $1, $zero, 4 -; MIPS32R6-NEXT: addu $1, $4, $1 +; MIPS32R6-NEXT: addu $2, $4, $1 ; MIPS32R6-NEXT: sw $6, 0($4) ; MIPS32R6-NEXT: sh $7, 4($4) -; MIPS32R6-NEXT: srl $2, $7, 16 -; MIPS32R6-NEXT: sb $2, 2($1) +; MIPS32R6-NEXT: srl $1, $7, 16 +; MIPS32R6-NEXT: sb $1, 2($2) ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align2* %S to i56* @@ -350,22 +350,22 @@ ; MIPS32-LABEL: store7align4: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $1, $zero, 4 -; MIPS32-NEXT: addu $1, $4, $1 +; MIPS32-NEXT: addu $2, $4, $1 ; MIPS32-NEXT: sw $6, 0($4) ; MIPS32-NEXT: sh $7, 4($4) -; MIPS32-NEXT: srl $2, $7, 16 -; MIPS32-NEXT: sb $2, 2($1) +; MIPS32-NEXT: srl $1, $7, 16 +; MIPS32-NEXT: sb $1, 2($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: store7align4: ; MIPS32R6: # %bb.0: # %entry ; MIPS32R6-NEXT: ori $1, $zero, 4 -; MIPS32R6-NEXT: addu $1, $4, $1 +; MIPS32R6-NEXT: addu $2, $4, $1 ; MIPS32R6-NEXT: sw $6, 0($4) ; MIPS32R6-NEXT: sh $7, 4($4) -; MIPS32R6-NEXT: srl $2, $7, 16 -; MIPS32R6-NEXT: sb $2, 2($1) +; MIPS32R6-NEXT: srl $1, $7, 16 +; MIPS32R6-NEXT: sb $1, 2($2) ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align4* %S to i56* @@ -378,22 +378,22 @@ ; MIPS32-LABEL: store7align8: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: ori $1, $zero, 4 -; MIPS32-NEXT: addu $1, $4, $1 +; MIPS32-NEXT: addu $2, $4, $1 ; MIPS32-NEXT: sw $6, 0($4) ; MIPS32-NEXT: sh $7, 4($4) -; MIPS32-NEXT: srl $2, $7, 16 -; MIPS32-NEXT: sb $2, 2($1) +; MIPS32-NEXT: srl $1, $7, 16 +; MIPS32-NEXT: sb $1, 2($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; ; MIPS32R6-LABEL: store7align8: ; MIPS32R6: # %bb.0: # %entry ; MIPS32R6-NEXT: ori $1, $zero, 4 -; MIPS32R6-NEXT: addu $1, $4, $1 +; MIPS32R6-NEXT: addu $2, $4, $1 ; MIPS32R6-NEXT: sw $6, 0($4) ; MIPS32R6-NEXT: sh $7, 4($4) -; MIPS32R6-NEXT: srl $2, $7, 16 -; MIPS32R6-NEXT: sb $2, 2($1) +; MIPS32R6-NEXT: srl $1, $7, 16 +; MIPS32R6-NEXT: sb $1, 2($2) ; MIPS32R6-NEXT: jrc $ra entry: %0 = bitcast %struct.MemSize7_Align8* %S to i56* @@ -406,13 +406,13 @@ ; MIPS32-LABEL: store_double_align1: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(double_align1) -; MIPS32-NEXT: addiu $1, $1, %lo(double_align1) -; MIPS32-NEXT: mfc1 $2, $f12 -; MIPS32-NEXT: mfc1 $3, $f13 -; MIPS32-NEXT: swl $2, 3($1) -; MIPS32-NEXT: swr $2, 0($1) -; MIPS32-NEXT: swl $3, 7($1) -; MIPS32-NEXT: swr $3, 4($1) +; MIPS32-NEXT: addiu $2, $1, %lo(double_align1) +; MIPS32-NEXT: mfc1 $3, $f12 +; MIPS32-NEXT: mfc1 $1, $f13 +; MIPS32-NEXT: swl $3, 3($2) +; MIPS32-NEXT: swr $3, 0($2) +; MIPS32-NEXT: swl $1, 7($2) +; MIPS32-NEXT: swr $1, 4($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; @@ -431,13 +431,13 @@ ; MIPS32-LABEL: store_double_align2: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(double_align2) -; MIPS32-NEXT: addiu $1, $1, %lo(double_align2) -; MIPS32-NEXT: mfc1 $2, $f12 -; MIPS32-NEXT: mfc1 $3, $f13 -; MIPS32-NEXT: swl $2, 3($1) -; MIPS32-NEXT: swr $2, 0($1) -; MIPS32-NEXT: swl $3, 7($1) -; MIPS32-NEXT: swr $3, 4($1) +; MIPS32-NEXT: addiu $2, $1, %lo(double_align2) +; MIPS32-NEXT: mfc1 $3, $f12 +; MIPS32-NEXT: mfc1 $1, $f13 +; MIPS32-NEXT: swl $3, 3($2) +; MIPS32-NEXT: swr $3, 0($2) +; MIPS32-NEXT: swl $1, 7($2) +; MIPS32-NEXT: swr $1, 4($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; @@ -456,11 +456,11 @@ ; MIPS32-LABEL: store_double_align4: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: lui $1, %hi(double_align4) -; MIPS32-NEXT: addiu $1, $1, %lo(double_align4) -; MIPS32-NEXT: mfc1 $2, $f12 -; MIPS32-NEXT: mfc1 $3, $f13 -; MIPS32-NEXT: sw $2, 0($1) -; MIPS32-NEXT: sw $3, 4($1) +; MIPS32-NEXT: addiu $2, $1, %lo(double_align4) +; MIPS32-NEXT: mfc1 $3, $f12 +; MIPS32-NEXT: mfc1 $1, $f13 +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: sw $1, 4($2) ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop ; Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub.ll @@ -88,10 +88,10 @@ ; MIPS32-LABEL: sub_i64: ; MIPS32: # %bb.0: # %entry ; MIPS32-NEXT: subu $2, $6, $4 -; MIPS32-NEXT: sltu $1, $6, $4 -; MIPS32-NEXT: subu $3, $7, $5 -; MIPS32-NEXT: andi $1, $1, 1 -; MIPS32-NEXT: subu $3, $3, $1 +; MIPS32-NEXT: sltu $3, $6, $4 +; MIPS32-NEXT: subu $1, $7, $5 +; MIPS32-NEXT: andi $3, $3, 1 +; MIPS32-NEXT: subu $3, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -102,38 +102,37 @@ define i128 @sub_i128(i128 %a, i128 %b) { ; MIPS32-LABEL: sub_i128: ; MIPS32: # %bb.0: # %entry +; MIPS32-NEXT: move $10, $5 +; MIPS32-NEXT: move $9, $6 ; MIPS32-NEXT: addiu $1, $sp, 16 +; MIPS32-NEXT: lw $3, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 20 +; MIPS32-NEXT: lw $6, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 24 +; MIPS32-NEXT: lw $5, 0($1) +; MIPS32-NEXT: addiu $1, $sp, 28 ; MIPS32-NEXT: lw $1, 0($1) -; MIPS32-NEXT: addiu $2, $sp, 20 -; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: addiu $3, $sp, 24 -; MIPS32-NEXT: lw $3, 0($3) -; MIPS32-NEXT: addiu $8, $sp, 28 -; MIPS32-NEXT: lw $8, 0($8) -; MIPS32-NEXT: subu $9, $1, $4 -; MIPS32-NEXT: sltu $1, $1, $4 -; MIPS32-NEXT: subu $4, $2, $5 -; MIPS32-NEXT: andi $10, $1, 1 -; MIPS32-NEXT: subu $4, $4, $10 -; MIPS32-NEXT: xor $10, $2, $5 -; MIPS32-NEXT: sltiu $10, $10, 1 -; MIPS32-NEXT: sltu $2, $2, $5 -; MIPS32-NEXT: andi $5, $10, 1 -; MIPS32-NEXT: movn $2, $1, $5 -; MIPS32-NEXT: subu $1, $3, $6 -; MIPS32-NEXT: andi $5, $2, 1 -; MIPS32-NEXT: subu $1, $1, $5 -; MIPS32-NEXT: xor $5, $3, $6 -; MIPS32-NEXT: sltiu $5, $5, 1 -; MIPS32-NEXT: sltu $3, $3, $6 +; MIPS32-NEXT: subu $2, $3, $4 +; MIPS32-NEXT: sltu $4, $3, $4 +; MIPS32-NEXT: subu $3, $6, $10 +; MIPS32-NEXT: andi $8, $4, 1 +; MIPS32-NEXT: subu $3, $3, $8 +; MIPS32-NEXT: xor $8, $6, $10 +; MIPS32-NEXT: sltiu $8, $8, 1 +; MIPS32-NEXT: sltu $6, $6, $10 +; MIPS32-NEXT: andi $8, $8, 1 +; MIPS32-NEXT: movn $6, $4, $8 +; MIPS32-NEXT: subu $4, $5, $9 +; MIPS32-NEXT: andi $8, $6, 1 +; MIPS32-NEXT: subu $4, $4, $8 +; MIPS32-NEXT: xor $8, $5, $9 +; MIPS32-NEXT: sltiu $8, $8, 1 +; MIPS32-NEXT: sltu $5, $5, $9 +; MIPS32-NEXT: andi $8, $8, 1 +; MIPS32-NEXT: movn $5, $6, $8 +; MIPS32-NEXT: subu $1, $1, $7 ; MIPS32-NEXT: andi $5, $5, 1 -; MIPS32-NEXT: movn $3, $2, $5 -; MIPS32-NEXT: subu $2, $8, $7 -; MIPS32-NEXT: andi $3, $3, 1 -; MIPS32-NEXT: subu $5, $2, $3 -; MIPS32-NEXT: move $2, $9 -; MIPS32-NEXT: move $3, $4 -; MIPS32-NEXT: move $4, $1 +; MIPS32-NEXT: subu $5, $1, $5 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/sub_vec.ll @@ -4,9 +4,9 @@ define void @sub_v16i8(<16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %c) { ; P5600-LABEL: sub_v16i8: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.b $w0, 0($4) -; P5600-NEXT: ld.b $w1, 0($5) -; P5600-NEXT: subv.b $w0, $w1, $w0 +; P5600-NEXT: ld.b $w1, 0($4) +; P5600-NEXT: ld.b $w0, 0($5) +; P5600-NEXT: subv.b $w0, $w0, $w1 ; P5600-NEXT: st.b $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -21,9 +21,9 @@ define void @sub_v8i16(<8 x i16>* %a, <8 x i16>* %b, <8 x i16>* %c) { ; P5600-LABEL: sub_v8i16: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.h $w0, 0($4) -; P5600-NEXT: ld.h $w1, 0($5) -; P5600-NEXT: subv.h $w0, $w1, $w0 +; P5600-NEXT: ld.h $w1, 0($4) +; P5600-NEXT: ld.h $w0, 0($5) +; P5600-NEXT: subv.h $w0, $w0, $w1 ; P5600-NEXT: st.h $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -38,9 +38,9 @@ define void @sub_v4i32(<4 x i32>* %a, <4 x i32>* %b, <4 x i32>* %c) { ; P5600-LABEL: sub_v4i32: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.w $w0, 0($4) -; P5600-NEXT: ld.w $w1, 0($5) -; P5600-NEXT: subv.w $w0, $w1, $w0 +; P5600-NEXT: ld.w $w1, 0($4) +; P5600-NEXT: ld.w $w0, 0($5) +; P5600-NEXT: subv.w $w0, $w0, $w1 ; P5600-NEXT: st.w $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop @@ -55,9 +55,9 @@ define void @sub_v2i64(<2 x i64>* %a, <2 x i64>* %b, <2 x i64>* %c) { ; P5600-LABEL: sub_v2i64: ; P5600: # %bb.0: # %entry -; P5600-NEXT: ld.d $w0, 0($4) -; P5600-NEXT: ld.d $w1, 0($5) -; P5600-NEXT: subv.d $w0, $w1, $w0 +; P5600-NEXT: ld.d $w1, 0($4) +; P5600-NEXT: ld.d $w0, 0($5) +; P5600-NEXT: subv.d $w0, $w0, $w1 ; P5600-NEXT: st.d $w0, 0($6) ; P5600-NEXT: jr $ra ; P5600-NEXT: nop Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/test_TypeInfoforMF.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/test_TypeInfoforMF.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/test_TypeInfoforMF.ll @@ -26,9 +26,9 @@ define i32 @outgoing_gpr_instr(i32* %i32_ptr1, i32* %i32_ptr2) { ; MIPS32-LABEL: outgoing_gpr_instr: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: lw $2, 0($5) -; MIPS32-NEXT: addu $2, $2, $1 +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: lw $1, 0($5) +; MIPS32-NEXT: addu $2, $1, $2 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -56,10 +56,10 @@ define i32 @incoming_gpr(i32 %incoming_phys_reg, i1 %test, i32* %a) { ; MIPS32-LABEL: incoming_gpr: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($6) -; MIPS32-NEXT: andi $2, $5, 1 -; MIPS32-NEXT: movn $4, $1, $2 ; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: lw $1, 0($6) +; MIPS32-NEXT: andi $3, $5, 1 +; MIPS32-NEXT: movn $2, $1, $3 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -71,10 +71,10 @@ define float @incoming_fpr(float %incoming_phys_reg, i1 %test, float* %a) { ; MIPS32-LABEL: incoming_fpr: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lwc1 $f0, 0($6) -; MIPS32-NEXT: andi $1, $5, 1 -; MIPS32-NEXT: movn.s $f12, $f0, $1 ; MIPS32-NEXT: mov.s $f0, $f12 +; MIPS32-NEXT: lwc1 $f1, 0($6) +; MIPS32-NEXT: andi $1, $5, 1 +; MIPS32-NEXT: movn.s $f0, $f1, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -103,11 +103,10 @@ define float @incoming_float_instr(float %val1, float %val2, float* %float_ptr, i1 %test) { ; MIPS32-LABEL: incoming_float_instr: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lwc1 $f0, 0($6) -; MIPS32-NEXT: add.s $f1, $f14, $f12 +; MIPS32-NEXT: lwc1 $f1, 0($6) +; MIPS32-NEXT: add.s $f0, $f14, $f12 ; MIPS32-NEXT: andi $1, $7, 1 -; MIPS32-NEXT: movn.s $f1, $f0, $1 -; MIPS32-NEXT: mov.s $f0, $f1 +; MIPS32-NEXT: movn.s $f0, $f1, $1 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/var_arg.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/var_arg.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/var_arg.ll @@ -13,6 +13,7 @@ ; MIPS32-NEXT: .cfi_def_cfa_offset 40 ; MIPS32-NEXT: sw $ra, 36($sp) # 4-byte Folded Spill ; MIPS32-NEXT: .cfi_offset 31, -4 +; MIPS32-NEXT: move $3, $4 ; MIPS32-NEXT: addiu $1, $sp, 44 ; MIPS32-NEXT: sw $5, 0($1) ; MIPS32-NEXT: addiu $1, $sp, 48 @@ -20,24 +21,23 @@ ; MIPS32-NEXT: addiu $1, $sp, 52 ; MIPS32-NEXT: sw $7, 0($1) ; MIPS32-NEXT: lui $1, %hi($.str) -; MIPS32-NEXT: addiu $1, $1, %lo($.str) -; MIPS32-NEXT: addiu $2, $sp, 32 -; MIPS32-NEXT: addiu $3, $sp, 28 +; MIPS32-NEXT: addiu $4, $1, %lo($.str) +; MIPS32-NEXT: addiu $6, $sp, 32 +; MIPS32-NEXT: addiu $2, $sp, 28 ; MIPS32-NEXT: addiu $5, $sp, 24 -; MIPS32-NEXT: addiu $6, $sp, 20 -; MIPS32-NEXT: sw $4, 0($2) -; MIPS32-NEXT: addiu $2, $sp, 44 -; MIPS32-NEXT: sw $2, 0($3) -; MIPS32-NEXT: lw $2, 0($3) +; MIPS32-NEXT: addiu $1, $sp, 20 +; MIPS32-NEXT: sw $3, 0($6) +; MIPS32-NEXT: addiu $3, $sp, 44 +; MIPS32-NEXT: sw $3, 0($2) +; MIPS32-NEXT: lw $2, 0($2) ; MIPS32-NEXT: sw $2, 0($5) ; MIPS32-NEXT: lw $2, 0($5) ; MIPS32-NEXT: ori $3, $zero, 4 ; MIPS32-NEXT: addu $3, $2, $3 ; MIPS32-NEXT: sw $3, 0($5) ; MIPS32-NEXT: lw $2, 0($2) -; MIPS32-NEXT: sw $2, 0($6) -; MIPS32-NEXT: lw $5, 0($6) -; MIPS32-NEXT: move $4, $1 +; MIPS32-NEXT: sw $2, 0($1) +; MIPS32-NEXT: lw $5, 0($1) ; MIPS32-NEXT: jal printf ; MIPS32-NEXT: nop ; MIPS32-NEXT: lw $ra, 36($sp) # 4-byte Folded Reload Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zextLoad_and_sextLoad.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zextLoad_and_sextLoad.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zextLoad_and_sextLoad.ll @@ -113,9 +113,8 @@ define i64 @load4_s32_to_sextLoad4_s64(i32* %px) { ; MIPS32-LABEL: load4_s32_to_sextLoad4_s64: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $1, 0($4) -; MIPS32-NEXT: sra $3, $1, 31 -; MIPS32-NEXT: move $2, $1 +; MIPS32-NEXT: lw $2, 0($4) +; MIPS32-NEXT: sra $3, $2, 31 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zext_and_sext.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zext_and_sext.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/zext_and_sext.ll @@ -4,8 +4,8 @@ define i64 @zext(i32 %x) { ; MIPS32-LABEL: zext: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: ori $3, $zero, 0 ; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: ori $3, $zero, 0 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: @@ -16,8 +16,8 @@ define i64 @sext(i32 %x) { ; MIPS32-LABEL: sext: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: sra $3, $4, 31 ; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: sra $3, $2, 31 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop entry: Index: llvm/test/CodeGen/Mips/atomic-min-max.ll =================================================================== --- llvm/test/CodeGen/Mips/atomic-min-max.ll +++ llvm/test/CodeGen/Mips/atomic-min-max.ll @@ -829,38 +829,38 @@ ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 2 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 65535 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 2 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 65535 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB4_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: slt $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movn $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB4_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: slt $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movn $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB4_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -869,38 +869,38 @@ ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 2 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 65535 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 2 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 65535 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB4_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: slt $11, $8, $5 -; MIPSR6-NEXT: seleqz $9, $8, $11 -; MIPSR6-NEXT: selnez $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB4_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: slt $5, $2, $7 +; MIPSR6-NEXT: seleqz $3, $2, $5 +; MIPSR6-NEXT: selnez $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB4_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -908,37 +908,37 @@ ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 2 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 65535 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 2 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 65535 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB4_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: slt $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movn $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB4_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: slt $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movn $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB4_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -946,38 +946,38 @@ ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 2 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 65535 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 2 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 65535 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB4_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: slt $11, $8, $5 -; MMR6-NEXT: seleqz $9, $8, $11 -; MMR6-NEXT: selnez $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB4_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: slt $5, $2, $7 +; MMR6-NEXT: seleqz $3, $2, $5 +; MMR6-NEXT: selnez $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB4_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -985,39 +985,39 @@ ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 65535 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 65535 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB4_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: slt $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movn $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB4_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: slt $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movn $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB4_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -1026,39 +1026,39 @@ ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 65535 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 65535 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB4_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: slt $11, $8, $5 -; MIPSELR6-NEXT: seleqz $9, $8, $11 -; MIPSELR6-NEXT: selnez $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB4_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: slt $5, $2, $7 +; MIPSELR6-NEXT: seleqz $3, $2, $5 +; MIPSELR6-NEXT: selnez $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB4_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -1066,38 +1066,38 @@ ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 65535 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 65535 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB4_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: slt $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movn $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB4_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: slt $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movn $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB4_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -1105,39 +1105,39 @@ ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 65535 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 65535 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB4_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: slt $11, $8, $5 -; MMELR6-NEXT: seleqz $9, $8, $11 -; MMELR6-NEXT: selnez $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB4_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: slt $5, $2, $7 +; MMELR6-NEXT: seleqz $3, $2, $5 +; MMELR6-NEXT: selnez $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB4_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -1145,38 +1145,38 @@ ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 2 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 65535 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 65535 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB4_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: slt $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movn $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB4_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: slt $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movn $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB4_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -1185,38 +1185,38 @@ ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 2 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 65535 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 65535 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB4_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: slt $10, $7, $5 -; MIPS64R6-NEXT: seleqz $8, $7, $10 -; MIPS64R6-NEXT: selnez $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB4_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: slt $5, $2, $7 +; MIPS64R6-NEXT: seleqz $3, $2, $5 +; MIPS64R6-NEXT: selnez $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB4_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -1224,39 +1224,39 @@ ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 65535 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 65535 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB4_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movn $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB4_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: slt $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movn $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB4_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -1265,39 +1265,39 @@ ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 65535 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 65535 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB4_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $10, $7, $5 -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 -; MIPS64ELR6-NEXT: selnez $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB4_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: slt $5, $2, $7 +; MIPS64ELR6-NEXT: seleqz $3, $2, $5 +; MIPS64ELR6-NEXT: selnez $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB4_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: @@ -1310,38 +1310,38 @@ ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 2 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 65535 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 2 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 65535 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB5_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: slt $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movz $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB5_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: slt $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movz $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB5_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -1350,38 +1350,38 @@ ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 2 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 65535 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 2 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 65535 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB5_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: slt $11, $8, $5 -; MIPSR6-NEXT: selnez $9, $8, $11 -; MIPSR6-NEXT: seleqz $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB5_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: slt $5, $2, $7 +; MIPSR6-NEXT: selnez $3, $2, $5 +; MIPSR6-NEXT: seleqz $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB5_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -1389,37 +1389,37 @@ ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 2 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 65535 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 2 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 65535 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB5_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: slt $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movz $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB5_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: slt $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movz $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB5_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -1427,38 +1427,38 @@ ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 2 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 65535 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 2 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 65535 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB5_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: slt $11, $8, $5 -; MMR6-NEXT: selnez $9, $8, $11 -; MMR6-NEXT: seleqz $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB5_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: slt $5, $2, $7 +; MMR6-NEXT: selnez $3, $2, $5 +; MMR6-NEXT: seleqz $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB5_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -1466,39 +1466,39 @@ ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 65535 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 65535 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB5_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: slt $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movz $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB5_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: slt $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movz $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB5_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -1507,39 +1507,39 @@ ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 65535 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 65535 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB5_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: slt $11, $8, $5 -; MIPSELR6-NEXT: selnez $9, $8, $11 -; MIPSELR6-NEXT: seleqz $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB5_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: slt $5, $2, $7 +; MIPSELR6-NEXT: selnez $3, $2, $5 +; MIPSELR6-NEXT: seleqz $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB5_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -1547,38 +1547,38 @@ ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 65535 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 65535 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB5_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: slt $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movz $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB5_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: slt $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movz $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB5_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -1586,39 +1586,39 @@ ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 65535 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 65535 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB5_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: slt $11, $8, $5 -; MMELR6-NEXT: selnez $9, $8, $11 -; MMELR6-NEXT: seleqz $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB5_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: slt $5, $2, $7 +; MMELR6-NEXT: selnez $3, $2, $5 +; MMELR6-NEXT: seleqz $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB5_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -1626,38 +1626,38 @@ ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 2 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 65535 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 65535 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB5_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: slt $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movz $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB5_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: slt $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movz $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB5_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -1666,38 +1666,38 @@ ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 2 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 65535 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 65535 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB5_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: slt $10, $7, $5 -; MIPS64R6-NEXT: selnez $8, $7, $10 -; MIPS64R6-NEXT: seleqz $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB5_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: slt $5, $2, $7 +; MIPS64R6-NEXT: selnez $3, $2, $5 +; MIPS64R6-NEXT: seleqz $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB5_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -1705,39 +1705,39 @@ ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 65535 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 65535 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB5_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movz $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB5_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: slt $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movz $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB5_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -1746,39 +1746,39 @@ ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 65535 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 65535 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB5_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $10, $7, $5 -; MIPS64ELR6-NEXT: selnez $8, $7, $10 -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB5_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: slt $5, $2, $7 +; MIPS64ELR6-NEXT: selnez $3, $2, $5 +; MIPS64ELR6-NEXT: seleqz $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB5_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: @@ -1791,38 +1791,38 @@ ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 2 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 65535 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 2 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 65535 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB6_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: sltu $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movn $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB6_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: sltu $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movn $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB6_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -1831,38 +1831,38 @@ ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 2 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 65535 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 2 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 65535 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB6_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: sltu $11, $8, $5 -; MIPSR6-NEXT: seleqz $9, $8, $11 -; MIPSR6-NEXT: selnez $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB6_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: sltu $5, $2, $7 +; MIPSR6-NEXT: seleqz $3, $2, $5 +; MIPSR6-NEXT: selnez $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB6_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -1870,37 +1870,37 @@ ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 2 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 65535 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 2 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 65535 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB6_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: sltu $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movn $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB6_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: sltu $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movn $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB6_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -1908,38 +1908,38 @@ ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 2 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 65535 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 2 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 65535 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB6_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: sltu $11, $8, $5 -; MMR6-NEXT: seleqz $9, $8, $11 -; MMR6-NEXT: selnez $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB6_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: sltu $5, $2, $7 +; MMR6-NEXT: seleqz $3, $2, $5 +; MMR6-NEXT: selnez $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB6_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -1947,39 +1947,39 @@ ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 65535 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 65535 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB6_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: sltu $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movn $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB6_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: sltu $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movn $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB6_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -1988,39 +1988,39 @@ ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 65535 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 65535 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB6_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: sltu $11, $8, $5 -; MIPSELR6-NEXT: seleqz $9, $8, $11 -; MIPSELR6-NEXT: selnez $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB6_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: sltu $5, $2, $7 +; MIPSELR6-NEXT: seleqz $3, $2, $5 +; MIPSELR6-NEXT: selnez $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB6_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -2028,38 +2028,38 @@ ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 65535 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 65535 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB6_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: sltu $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movn $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB6_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: sltu $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movn $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB6_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -2067,39 +2067,39 @@ ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 65535 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 65535 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB6_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: sltu $11, $8, $5 -; MMELR6-NEXT: seleqz $9, $8, $11 -; MMELR6-NEXT: selnez $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB6_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: sltu $5, $2, $7 +; MMELR6-NEXT: seleqz $3, $2, $5 +; MMELR6-NEXT: selnez $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB6_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -2107,38 +2107,38 @@ ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 2 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 65535 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 65535 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB6_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: sltu $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movn $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB6_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: sltu $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movn $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB6_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -2147,38 +2147,38 @@ ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 2 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 65535 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 65535 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB6_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: sltu $10, $7, $5 -; MIPS64R6-NEXT: seleqz $8, $7, $10 -; MIPS64R6-NEXT: selnez $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB6_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: sltu $5, $2, $7 +; MIPS64R6-NEXT: seleqz $3, $2, $5 +; MIPS64R6-NEXT: selnez $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB6_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -2186,39 +2186,39 @@ ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 65535 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 65535 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB6_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movn $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB6_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: sltu $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movn $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB6_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -2227,39 +2227,39 @@ ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 65535 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 65535 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB6_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $10, $7, $5 -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 -; MIPS64ELR6-NEXT: selnez $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB6_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: sltu $5, $2, $7 +; MIPS64ELR6-NEXT: seleqz $3, $2, $5 +; MIPS64ELR6-NEXT: selnez $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB6_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: @@ -2272,38 +2272,38 @@ ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 2 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 65535 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 2 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 65535 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB7_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: sltu $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movz $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB7_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: sltu $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movz $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB7_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -2312,38 +2312,38 @@ ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 2 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 65535 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 2 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 65535 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB7_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: sltu $11, $8, $5 -; MIPSR6-NEXT: selnez $9, $8, $11 -; MIPSR6-NEXT: seleqz $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB7_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: sltu $5, $2, $7 +; MIPSR6-NEXT: selnez $3, $2, $5 +; MIPSR6-NEXT: seleqz $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB7_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -2351,37 +2351,37 @@ ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 2 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 65535 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 2 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 65535 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB7_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: sltu $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movz $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB7_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: sltu $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movz $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB7_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -2389,38 +2389,38 @@ ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 2 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 65535 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 2 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 65535 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB7_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: sltu $11, $8, $5 -; MMR6-NEXT: selnez $9, $8, $11 -; MMR6-NEXT: seleqz $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB7_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: sltu $5, $2, $7 +; MMR6-NEXT: selnez $3, $2, $5 +; MMR6-NEXT: seleqz $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB7_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -2428,39 +2428,39 @@ ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 65535 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 65535 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB7_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: sltu $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movz $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB7_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: sltu $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movz $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB7_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -2469,39 +2469,39 @@ ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 65535 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 65535 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB7_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: sltu $11, $8, $5 -; MIPSELR6-NEXT: selnez $9, $8, $11 -; MIPSELR6-NEXT: seleqz $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB7_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: sltu $5, $2, $7 +; MIPSELR6-NEXT: selnez $3, $2, $5 +; MIPSELR6-NEXT: seleqz $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB7_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -2509,38 +2509,38 @@ ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 65535 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 65535 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB7_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: sltu $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movz $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB7_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: sltu $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movz $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB7_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -2548,39 +2548,39 @@ ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 65535 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 65535 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB7_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: sltu $11, $8, $5 -; MMELR6-NEXT: selnez $9, $8, $11 -; MMELR6-NEXT: seleqz $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB7_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: sltu $5, $2, $7 +; MMELR6-NEXT: selnez $3, $2, $5 +; MMELR6-NEXT: seleqz $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB7_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -2588,38 +2588,38 @@ ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 2 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 65535 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 65535 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB7_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: sltu $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movz $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB7_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: sltu $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movz $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB7_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -2628,38 +2628,38 @@ ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 2 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 65535 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 65535 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB7_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: sltu $10, $7, $5 -; MIPS64R6-NEXT: selnez $8, $7, $10 -; MIPS64R6-NEXT: seleqz $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB7_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: sltu $5, $2, $7 +; MIPS64R6-NEXT: selnez $3, $2, $5 +; MIPS64R6-NEXT: seleqz $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB7_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -2667,39 +2667,39 @@ ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 65535 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 65535 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB7_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movz $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB7_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: sltu $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movz $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB7_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -2708,39 +2708,39 @@ ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 65535 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 65535 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB7_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $10, $7, $5 -; MIPS64ELR6-NEXT: selnez $8, $7, $10 -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB7_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: sltu $5, $2, $7 +; MIPS64ELR6-NEXT: selnez $3, $2, $5 +; MIPS64ELR6-NEXT: seleqz $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB7_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: @@ -2754,38 +2754,38 @@ ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 3 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 255 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 3 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 255 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB8_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: slt $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movn $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB8_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: slt $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movn $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB8_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -2794,38 +2794,38 @@ ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 3 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 255 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 3 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 255 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB8_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: slt $11, $8, $5 -; MIPSR6-NEXT: seleqz $9, $8, $11 -; MIPSR6-NEXT: selnez $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB8_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: slt $5, $2, $7 +; MIPSR6-NEXT: seleqz $3, $2, $5 +; MIPSR6-NEXT: selnez $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB8_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -2833,37 +2833,37 @@ ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 3 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 255 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 3 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 255 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB8_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: slt $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movn $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB8_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: slt $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movn $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB8_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -2871,38 +2871,38 @@ ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 3 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 255 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 3 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 255 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB8_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: slt $11, $8, $5 -; MMR6-NEXT: seleqz $9, $8, $11 -; MMR6-NEXT: selnez $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB8_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: slt $5, $2, $7 +; MMR6-NEXT: seleqz $3, $2, $5 +; MMR6-NEXT: selnez $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB8_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -2910,39 +2910,39 @@ ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 255 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 255 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB8_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: slt $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movn $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB8_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: slt $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movn $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB8_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -2951,39 +2951,39 @@ ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 255 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 255 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB8_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: slt $11, $8, $5 -; MIPSELR6-NEXT: seleqz $9, $8, $11 -; MIPSELR6-NEXT: selnez $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB8_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: slt $5, $2, $7 +; MIPSELR6-NEXT: seleqz $3, $2, $5 +; MIPSELR6-NEXT: selnez $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB8_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -2991,38 +2991,38 @@ ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 255 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 255 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB8_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: slt $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movn $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB8_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: slt $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movn $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB8_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -3030,39 +3030,39 @@ ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 255 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 255 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB8_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: slt $11, $8, $5 -; MMELR6-NEXT: seleqz $9, $8, $11 -; MMELR6-NEXT: selnez $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB8_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: slt $5, $2, $7 +; MMELR6-NEXT: seleqz $3, $2, $5 +; MMELR6-NEXT: selnez $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB8_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -3070,38 +3070,38 @@ ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 3 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 255 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 255 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB8_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: slt $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movn $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB8_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: slt $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movn $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB8_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -3110,38 +3110,38 @@ ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 3 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 255 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 255 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB8_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: slt $10, $7, $5 -; MIPS64R6-NEXT: seleqz $8, $7, $10 -; MIPS64R6-NEXT: selnez $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB8_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: slt $5, $2, $7 +; MIPS64R6-NEXT: seleqz $3, $2, $5 +; MIPS64R6-NEXT: selnez $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB8_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -3149,39 +3149,39 @@ ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 255 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 255 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB8_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movn $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB8_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: slt $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movn $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB8_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -3190,39 +3190,39 @@ ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 255 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 255 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB8_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $10, $7, $5 -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 -; MIPS64ELR6-NEXT: selnez $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB8_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: slt $5, $2, $7 +; MIPS64ELR6-NEXT: seleqz $3, $2, $5 +; MIPS64ELR6-NEXT: selnez $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB8_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: @@ -3235,38 +3235,38 @@ ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 3 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 255 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 3 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 255 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB9_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: slt $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movz $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB9_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: slt $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movz $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB9_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -3275,38 +3275,38 @@ ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 3 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 255 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 3 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 255 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB9_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: slt $11, $8, $5 -; MIPSR6-NEXT: selnez $9, $8, $11 -; MIPSR6-NEXT: seleqz $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB9_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: slt $5, $2, $7 +; MIPSR6-NEXT: selnez $3, $2, $5 +; MIPSR6-NEXT: seleqz $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB9_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -3314,37 +3314,37 @@ ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 3 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 255 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 3 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 255 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB9_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: slt $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movz $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB9_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: slt $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movz $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB9_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -3352,38 +3352,38 @@ ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 3 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 255 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 3 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 255 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB9_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: slt $11, $8, $5 -; MMR6-NEXT: selnez $9, $8, $11 -; MMR6-NEXT: seleqz $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB9_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: slt $5, $2, $7 +; MMR6-NEXT: selnez $3, $2, $5 +; MMR6-NEXT: seleqz $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB9_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -3391,39 +3391,39 @@ ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 255 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 255 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB9_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: slt $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movz $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB9_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: slt $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movz $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB9_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -3432,39 +3432,39 @@ ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 255 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 255 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB9_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: slt $11, $8, $5 -; MIPSELR6-NEXT: selnez $9, $8, $11 -; MIPSELR6-NEXT: seleqz $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB9_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: slt $5, $2, $7 +; MIPSELR6-NEXT: selnez $3, $2, $5 +; MIPSELR6-NEXT: seleqz $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB9_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -3472,38 +3472,38 @@ ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 255 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 255 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB9_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: slt $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movz $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB9_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: slt $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movz $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB9_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -3511,39 +3511,39 @@ ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 255 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 255 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB9_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: slt $11, $8, $5 -; MMELR6-NEXT: selnez $9, $8, $11 -; MMELR6-NEXT: seleqz $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB9_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: slt $5, $2, $7 +; MMELR6-NEXT: selnez $3, $2, $5 +; MMELR6-NEXT: seleqz $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB9_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -3551,38 +3551,38 @@ ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 3 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 255 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 255 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB9_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: slt $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movz $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB9_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: slt $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movz $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB9_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -3591,38 +3591,38 @@ ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 3 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 255 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 255 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB9_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: slt $10, $7, $5 -; MIPS64R6-NEXT: selnez $8, $7, $10 -; MIPS64R6-NEXT: seleqz $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB9_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: slt $5, $2, $7 +; MIPS64R6-NEXT: selnez $3, $2, $5 +; MIPS64R6-NEXT: seleqz $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB9_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -3630,39 +3630,39 @@ ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 255 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 255 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB9_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movz $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB9_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: slt $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movz $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB9_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -3671,39 +3671,39 @@ ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 255 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 255 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB9_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $10, $7, $5 -; MIPS64ELR6-NEXT: selnez $8, $7, $10 -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB9_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: slt $5, $2, $7 +; MIPS64ELR6-NEXT: selnez $3, $2, $5 +; MIPS64ELR6-NEXT: seleqz $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB9_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: @@ -3716,38 +3716,38 @@ ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 3 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 255 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 3 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 255 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB10_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: sltu $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movn $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB10_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: sltu $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movn $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB10_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -3756,38 +3756,38 @@ ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 3 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 255 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 3 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 255 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB10_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: sltu $11, $8, $5 -; MIPSR6-NEXT: seleqz $9, $8, $11 -; MIPSR6-NEXT: selnez $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB10_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: sltu $5, $2, $7 +; MIPSR6-NEXT: seleqz $3, $2, $5 +; MIPSR6-NEXT: selnez $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB10_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -3795,37 +3795,37 @@ ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 3 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 255 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 3 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 255 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB10_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: sltu $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movn $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB10_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: sltu $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movn $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB10_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -3833,38 +3833,38 @@ ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 3 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 255 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 3 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 255 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB10_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: sltu $11, $8, $5 -; MMR6-NEXT: seleqz $9, $8, $11 -; MMR6-NEXT: selnez $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB10_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: sltu $5, $2, $7 +; MMR6-NEXT: seleqz $3, $2, $5 +; MMR6-NEXT: selnez $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB10_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -3872,39 +3872,39 @@ ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 255 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 255 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB10_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: sltu $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movn $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB10_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: sltu $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movn $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB10_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -3913,39 +3913,39 @@ ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 255 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 255 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB10_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: sltu $11, $8, $5 -; MIPSELR6-NEXT: seleqz $9, $8, $11 -; MIPSELR6-NEXT: selnez $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB10_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: sltu $5, $2, $7 +; MIPSELR6-NEXT: seleqz $3, $2, $5 +; MIPSELR6-NEXT: selnez $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB10_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -3953,38 +3953,38 @@ ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 255 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 255 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB10_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: sltu $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movn $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB10_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: sltu $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movn $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB10_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -3992,39 +3992,39 @@ ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 255 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 255 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB10_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: sltu $11, $8, $5 -; MMELR6-NEXT: seleqz $9, $8, $11 -; MMELR6-NEXT: selnez $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB10_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: sltu $5, $2, $7 +; MMELR6-NEXT: seleqz $3, $2, $5 +; MMELR6-NEXT: selnez $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB10_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -4032,38 +4032,38 @@ ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 3 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 255 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 255 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB10_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: sltu $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movn $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB10_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: sltu $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movn $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB10_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -4072,38 +4072,38 @@ ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 3 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 255 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 255 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB10_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: sltu $10, $7, $5 -; MIPS64R6-NEXT: seleqz $8, $7, $10 -; MIPS64R6-NEXT: selnez $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB10_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: sltu $5, $2, $7 +; MIPS64R6-NEXT: seleqz $3, $2, $5 +; MIPS64R6-NEXT: selnez $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB10_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -4111,39 +4111,39 @@ ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 255 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 255 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB10_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movn $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB10_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: sltu $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movn $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB10_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -4152,39 +4152,39 @@ ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 255 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 255 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB10_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $10, $7, $5 -; MIPS64ELR6-NEXT: seleqz $8, $7, $10 -; MIPS64ELR6-NEXT: selnez $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB10_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: sltu $5, $2, $7 +; MIPS64ELR6-NEXT: seleqz $3, $2, $5 +; MIPS64ELR6-NEXT: selnez $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB10_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: @@ -4197,38 +4197,38 @@ ; MIPS: # %bb.0: # %entry ; MIPS-NEXT: addiu $sp, $sp, -8 ; MIPS-NEXT: .cfi_def_cfa_offset 8 -; MIPS-NEXT: move $1, $5 +; MIPS-NEXT: # kill: def $at killed $a1 ; MIPS-NEXT: sync -; MIPS-NEXT: addiu $2, $zero, -4 -; MIPS-NEXT: and $2, $4, $2 -; MIPS-NEXT: andi $3, $4, 3 -; MIPS-NEXT: xori $3, $3, 3 -; MIPS-NEXT: sll $3, $3, 3 -; MIPS-NEXT: ori $4, $zero, 255 -; MIPS-NEXT: sllv $4, $4, $3 -; MIPS-NEXT: nor $6, $zero, $4 -; MIPS-NEXT: sllv $5, $5, $3 +; MIPS-NEXT: addiu $1, $zero, -4 +; MIPS-NEXT: and $6, $4, $1 +; MIPS-NEXT: andi $1, $4, 3 +; MIPS-NEXT: xori $1, $1, 3 +; MIPS-NEXT: sll $10, $1, 3 +; MIPS-NEXT: ori $1, $zero, 255 +; MIPS-NEXT: sllv $8, $1, $10 +; MIPS-NEXT: nor $9, $zero, $8 +; MIPS-NEXT: sllv $7, $5, $10 ; MIPS-NEXT: $BB11_1: # %entry ; MIPS-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS-NEXT: ll $8, 0($2) -; MIPS-NEXT: sltu $11, $8, $5 -; MIPS-NEXT: move $9, $8 -; MIPS-NEXT: movz $9, $5, $11 -; MIPS-NEXT: and $9, $9, $4 -; MIPS-NEXT: and $10, $8, $6 -; MIPS-NEXT: or $10, $10, $9 -; MIPS-NEXT: sc $10, 0($2) -; MIPS-NEXT: beqz $10, $BB11_1 +; MIPS-NEXT: ll $2, 0($6) +; MIPS-NEXT: sltu $5, $2, $7 +; MIPS-NEXT: move $3, $2 +; MIPS-NEXT: movz $3, $7, $5 +; MIPS-NEXT: and $3, $3, $8 +; MIPS-NEXT: and $4, $2, $9 +; MIPS-NEXT: or $4, $4, $3 +; MIPS-NEXT: sc $4, 0($6) +; MIPS-NEXT: beqz $4, $BB11_1 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.2: # %entry -; MIPS-NEXT: and $7, $8, $4 -; MIPS-NEXT: srlv $7, $7, $3 -; MIPS-NEXT: seh $7, $7 +; MIPS-NEXT: and $1, $2, $8 +; MIPS-NEXT: srlv $1, $1, $10 +; MIPS-NEXT: seh $1, $1 ; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: sync ; MIPS-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS-NEXT: sync ; MIPS-NEXT: addiu $sp, $sp, 8 ; MIPS-NEXT: jr $ra ; MIPS-NEXT: nop @@ -4237,38 +4237,38 @@ ; MIPSR6: # %bb.0: # %entry ; MIPSR6-NEXT: addiu $sp, $sp, -8 ; MIPSR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSR6-NEXT: move $1, $5 +; MIPSR6-NEXT: # kill: def $at killed $a1 ; MIPSR6-NEXT: sync -; MIPSR6-NEXT: addiu $2, $zero, -4 -; MIPSR6-NEXT: and $2, $4, $2 -; MIPSR6-NEXT: andi $3, $4, 3 -; MIPSR6-NEXT: xori $3, $3, 3 -; MIPSR6-NEXT: sll $3, $3, 3 -; MIPSR6-NEXT: ori $4, $zero, 255 -; MIPSR6-NEXT: sllv $4, $4, $3 -; MIPSR6-NEXT: nor $6, $zero, $4 -; MIPSR6-NEXT: sllv $5, $5, $3 +; MIPSR6-NEXT: addiu $1, $zero, -4 +; MIPSR6-NEXT: and $6, $4, $1 +; MIPSR6-NEXT: andi $1, $4, 3 +; MIPSR6-NEXT: xori $1, $1, 3 +; MIPSR6-NEXT: sll $10, $1, 3 +; MIPSR6-NEXT: ori $1, $zero, 255 +; MIPSR6-NEXT: sllv $8, $1, $10 +; MIPSR6-NEXT: nor $9, $zero, $8 +; MIPSR6-NEXT: sllv $7, $5, $10 ; MIPSR6-NEXT: $BB11_1: # %entry ; MIPSR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSR6-NEXT: ll $8, 0($2) -; MIPSR6-NEXT: sltu $11, $8, $5 -; MIPSR6-NEXT: selnez $9, $8, $11 -; MIPSR6-NEXT: seleqz $11, $5, $11 -; MIPSR6-NEXT: or $9, $9, $11 -; MIPSR6-NEXT: and $9, $9, $4 -; MIPSR6-NEXT: and $10, $8, $6 -; MIPSR6-NEXT: or $10, $10, $9 -; MIPSR6-NEXT: sc $10, 0($2) -; MIPSR6-NEXT: beqzc $10, $BB11_1 +; MIPSR6-NEXT: ll $2, 0($6) +; MIPSR6-NEXT: sltu $5, $2, $7 +; MIPSR6-NEXT: selnez $3, $2, $5 +; MIPSR6-NEXT: seleqz $5, $7, $5 +; MIPSR6-NEXT: or $3, $3, $5 +; MIPSR6-NEXT: and $3, $3, $8 +; MIPSR6-NEXT: and $4, $2, $9 +; MIPSR6-NEXT: or $4, $4, $3 +; MIPSR6-NEXT: sc $4, 0($6) +; MIPSR6-NEXT: beqzc $4, $BB11_1 ; MIPSR6-NEXT: # %bb.2: # %entry -; MIPSR6-NEXT: and $7, $8, $4 -; MIPSR6-NEXT: srlv $7, $7, $3 -; MIPSR6-NEXT: seh $7, $7 +; MIPSR6-NEXT: and $1, $2, $8 +; MIPSR6-NEXT: srlv $1, $1, $10 +; MIPSR6-NEXT: seh $1, $1 ; MIPSR6-NEXT: # %bb.3: # %entry -; MIPSR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSR6-NEXT: # %bb.4: # %entry -; MIPSR6-NEXT: sync ; MIPSR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSR6-NEXT: sync ; MIPSR6-NEXT: addiu $sp, $sp, 8 ; MIPSR6-NEXT: jrc $ra ; @@ -4276,37 +4276,37 @@ ; MM: # %bb.0: # %entry ; MM-NEXT: addiu $sp, $sp, -8 ; MM-NEXT: .cfi_def_cfa_offset 8 -; MM-NEXT: move $1, $5 +; MM-NEXT: # kill: def $at killed $a1 ; MM-NEXT: sync -; MM-NEXT: addiu $2, $zero, -4 -; MM-NEXT: and $2, $4, $2 -; MM-NEXT: andi $3, $4, 3 -; MM-NEXT: xori $3, $3, 3 -; MM-NEXT: sll $3, $3, 3 -; MM-NEXT: ori $4, $zero, 255 -; MM-NEXT: sllv $4, $4, $3 -; MM-NEXT: nor $6, $zero, $4 -; MM-NEXT: sllv $5, $5, $3 +; MM-NEXT: addiu $1, $zero, -4 +; MM-NEXT: and $6, $4, $1 +; MM-NEXT: andi $1, $4, 3 +; MM-NEXT: xori $1, $1, 3 +; MM-NEXT: sll $10, $1, 3 +; MM-NEXT: ori $1, $zero, 255 +; MM-NEXT: sllv $8, $1, $10 +; MM-NEXT: nor $9, $zero, $8 +; MM-NEXT: sllv $7, $5, $10 ; MM-NEXT: $BB11_1: # %entry ; MM-NEXT: # =>This Inner Loop Header: Depth=1 -; MM-NEXT: ll $8, 0($2) -; MM-NEXT: sltu $11, $8, $5 -; MM-NEXT: or $9, $8, $zero -; MM-NEXT: movz $9, $5, $11 -; MM-NEXT: and $9, $9, $4 -; MM-NEXT: and $10, $8, $6 -; MM-NEXT: or $10, $10, $9 -; MM-NEXT: sc $10, 0($2) -; MM-NEXT: beqzc $10, $BB11_1 +; MM-NEXT: ll $2, 0($6) +; MM-NEXT: sltu $5, $2, $7 +; MM-NEXT: or $3, $2, $zero +; MM-NEXT: movz $3, $7, $5 +; MM-NEXT: and $3, $3, $8 +; MM-NEXT: and $4, $2, $9 +; MM-NEXT: or $4, $4, $3 +; MM-NEXT: sc $4, 0($6) +; MM-NEXT: beqzc $4, $BB11_1 ; MM-NEXT: # %bb.2: # %entry -; MM-NEXT: and $7, $8, $4 -; MM-NEXT: srlv $7, $7, $3 -; MM-NEXT: seh $7, $7 +; MM-NEXT: and $1, $2, $8 +; MM-NEXT: srlv $1, $1, $10 +; MM-NEXT: seh $1, $1 ; MM-NEXT: # %bb.3: # %entry -; MM-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MM-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MM-NEXT: # %bb.4: # %entry -; MM-NEXT: sync ; MM-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MM-NEXT: sync ; MM-NEXT: addiusp 8 ; MM-NEXT: jrc $ra ; @@ -4314,38 +4314,38 @@ ; MMR6: # %bb.0: # %entry ; MMR6-NEXT: addiu $sp, $sp, -8 ; MMR6-NEXT: .cfi_def_cfa_offset 8 -; MMR6-NEXT: move $1, $5 +; MMR6-NEXT: # kill: def $at killed $a1 ; MMR6-NEXT: sync -; MMR6-NEXT: addiu $2, $zero, -4 -; MMR6-NEXT: and $2, $4, $2 -; MMR6-NEXT: andi $3, $4, 3 -; MMR6-NEXT: xori $3, $3, 3 -; MMR6-NEXT: sll $3, $3, 3 -; MMR6-NEXT: ori $4, $zero, 255 -; MMR6-NEXT: sllv $4, $4, $3 -; MMR6-NEXT: nor $6, $zero, $4 -; MMR6-NEXT: sllv $5, $5, $3 +; MMR6-NEXT: addiu $1, $zero, -4 +; MMR6-NEXT: and $6, $4, $1 +; MMR6-NEXT: andi $1, $4, 3 +; MMR6-NEXT: xori $1, $1, 3 +; MMR6-NEXT: sll $10, $1, 3 +; MMR6-NEXT: ori $1, $zero, 255 +; MMR6-NEXT: sllv $8, $1, $10 +; MMR6-NEXT: nor $9, $zero, $8 +; MMR6-NEXT: sllv $7, $5, $10 ; MMR6-NEXT: $BB11_1: # %entry ; MMR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMR6-NEXT: ll $8, 0($2) -; MMR6-NEXT: sltu $11, $8, $5 -; MMR6-NEXT: selnez $9, $8, $11 -; MMR6-NEXT: seleqz $11, $5, $11 -; MMR6-NEXT: or $9, $9, $11 -; MMR6-NEXT: and $9, $9, $4 -; MMR6-NEXT: and $10, $8, $6 -; MMR6-NEXT: or $10, $10, $9 -; MMR6-NEXT: sc $10, 0($2) -; MMR6-NEXT: beqc $10, $zero, $BB11_1 +; MMR6-NEXT: ll $2, 0($6) +; MMR6-NEXT: sltu $5, $2, $7 +; MMR6-NEXT: selnez $3, $2, $5 +; MMR6-NEXT: seleqz $5, $7, $5 +; MMR6-NEXT: or $3, $3, $5 +; MMR6-NEXT: and $3, $3, $8 +; MMR6-NEXT: and $4, $2, $9 +; MMR6-NEXT: or $4, $4, $3 +; MMR6-NEXT: sc $4, 0($6) +; MMR6-NEXT: beqc $4, $zero, $BB11_1 ; MMR6-NEXT: # %bb.2: # %entry -; MMR6-NEXT: and $7, $8, $4 -; MMR6-NEXT: srlv $7, $7, $3 -; MMR6-NEXT: seh $7, $7 +; MMR6-NEXT: and $1, $2, $8 +; MMR6-NEXT: srlv $1, $1, $10 +; MMR6-NEXT: seh $1, $1 ; MMR6-NEXT: # %bb.3: # %entry -; MMR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMR6-NEXT: # %bb.4: # %entry -; MMR6-NEXT: sync ; MMR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: sync ; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra ; @@ -4353,39 +4353,39 @@ ; MIPSEL: # %bb.0: # %entry ; MIPSEL-NEXT: addiu $sp, $sp, -8 ; MIPSEL-NEXT: .cfi_def_cfa_offset 8 -; MIPSEL-NEXT: move $1, $5 +; MIPSEL-NEXT: # kill: def $at killed $a1 ; MIPSEL-NEXT: sync -; MIPSEL-NEXT: addiu $2, $zero, -4 -; MIPSEL-NEXT: and $2, $4, $2 -; MIPSEL-NEXT: andi $3, $4, 3 -; MIPSEL-NEXT: sll $3, $3, 3 -; MIPSEL-NEXT: ori $4, $zero, 255 -; MIPSEL-NEXT: sllv $4, $4, $3 -; MIPSEL-NEXT: nor $6, $zero, $4 -; MIPSEL-NEXT: sllv $5, $5, $3 +; MIPSEL-NEXT: addiu $1, $zero, -4 +; MIPSEL-NEXT: and $6, $4, $1 +; MIPSEL-NEXT: andi $1, $4, 3 +; MIPSEL-NEXT: sll $10, $1, 3 +; MIPSEL-NEXT: ori $1, $zero, 255 +; MIPSEL-NEXT: sllv $8, $1, $10 +; MIPSEL-NEXT: nor $9, $zero, $8 +; MIPSEL-NEXT: sllv $7, $5, $10 ; MIPSEL-NEXT: $BB11_1: # %entry ; MIPSEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSEL-NEXT: ll $8, 0($2) -; MIPSEL-NEXT: and $8, $8, $4 -; MIPSEL-NEXT: and $5, $5, $4 -; MIPSEL-NEXT: sltu $11, $8, $5 -; MIPSEL-NEXT: move $9, $8 -; MIPSEL-NEXT: movz $9, $5, $11 -; MIPSEL-NEXT: and $9, $9, $4 -; MIPSEL-NEXT: and $10, $8, $6 -; MIPSEL-NEXT: or $10, $10, $9 -; MIPSEL-NEXT: sc $10, 0($2) -; MIPSEL-NEXT: beqz $10, $BB11_1 +; MIPSEL-NEXT: ll $2, 0($6) +; MIPSEL-NEXT: and $2, $2, $8 +; MIPSEL-NEXT: and $7, $7, $8 +; MIPSEL-NEXT: sltu $5, $2, $7 +; MIPSEL-NEXT: move $3, $2 +; MIPSEL-NEXT: movz $3, $7, $5 +; MIPSEL-NEXT: and $3, $3, $8 +; MIPSEL-NEXT: and $4, $2, $9 +; MIPSEL-NEXT: or $4, $4, $3 +; MIPSEL-NEXT: sc $4, 0($6) +; MIPSEL-NEXT: beqz $4, $BB11_1 ; MIPSEL-NEXT: nop ; MIPSEL-NEXT: # %bb.2: # %entry -; MIPSEL-NEXT: and $7, $8, $4 -; MIPSEL-NEXT: srlv $7, $7, $3 -; MIPSEL-NEXT: seh $7, $7 +; MIPSEL-NEXT: and $1, $2, $8 +; MIPSEL-NEXT: srlv $1, $1, $10 +; MIPSEL-NEXT: seh $1, $1 ; MIPSEL-NEXT: # %bb.3: # %entry -; MIPSEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSEL-NEXT: # %bb.4: # %entry -; MIPSEL-NEXT: sync ; MIPSEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSEL-NEXT: sync ; MIPSEL-NEXT: addiu $sp, $sp, 8 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: nop @@ -4394,39 +4394,39 @@ ; MIPSELR6: # %bb.0: # %entry ; MIPSELR6-NEXT: addiu $sp, $sp, -8 ; MIPSELR6-NEXT: .cfi_def_cfa_offset 8 -; MIPSELR6-NEXT: move $1, $5 +; MIPSELR6-NEXT: # kill: def $at killed $a1 ; MIPSELR6-NEXT: sync -; MIPSELR6-NEXT: addiu $2, $zero, -4 -; MIPSELR6-NEXT: and $2, $4, $2 -; MIPSELR6-NEXT: andi $3, $4, 3 -; MIPSELR6-NEXT: sll $3, $3, 3 -; MIPSELR6-NEXT: ori $4, $zero, 255 -; MIPSELR6-NEXT: sllv $4, $4, $3 -; MIPSELR6-NEXT: nor $6, $zero, $4 -; MIPSELR6-NEXT: sllv $5, $5, $3 +; MIPSELR6-NEXT: addiu $1, $zero, -4 +; MIPSELR6-NEXT: and $6, $4, $1 +; MIPSELR6-NEXT: andi $1, $4, 3 +; MIPSELR6-NEXT: sll $10, $1, 3 +; MIPSELR6-NEXT: ori $1, $zero, 255 +; MIPSELR6-NEXT: sllv $8, $1, $10 +; MIPSELR6-NEXT: nor $9, $zero, $8 +; MIPSELR6-NEXT: sllv $7, $5, $10 ; MIPSELR6-NEXT: $BB11_1: # %entry ; MIPSELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPSELR6-NEXT: ll $8, 0($2) -; MIPSELR6-NEXT: and $8, $8, $4 -; MIPSELR6-NEXT: and $5, $5, $4 -; MIPSELR6-NEXT: sltu $11, $8, $5 -; MIPSELR6-NEXT: selnez $9, $8, $11 -; MIPSELR6-NEXT: seleqz $11, $5, $11 -; MIPSELR6-NEXT: or $9, $9, $11 -; MIPSELR6-NEXT: and $9, $9, $4 -; MIPSELR6-NEXT: and $10, $8, $6 -; MIPSELR6-NEXT: or $10, $10, $9 -; MIPSELR6-NEXT: sc $10, 0($2) -; MIPSELR6-NEXT: beqzc $10, $BB11_1 +; MIPSELR6-NEXT: ll $2, 0($6) +; MIPSELR6-NEXT: and $2, $2, $8 +; MIPSELR6-NEXT: and $7, $7, $8 +; MIPSELR6-NEXT: sltu $5, $2, $7 +; MIPSELR6-NEXT: selnez $3, $2, $5 +; MIPSELR6-NEXT: seleqz $5, $7, $5 +; MIPSELR6-NEXT: or $3, $3, $5 +; MIPSELR6-NEXT: and $3, $3, $8 +; MIPSELR6-NEXT: and $4, $2, $9 +; MIPSELR6-NEXT: or $4, $4, $3 +; MIPSELR6-NEXT: sc $4, 0($6) +; MIPSELR6-NEXT: beqzc $4, $BB11_1 ; MIPSELR6-NEXT: # %bb.2: # %entry -; MIPSELR6-NEXT: and $7, $8, $4 -; MIPSELR6-NEXT: srlv $7, $7, $3 -; MIPSELR6-NEXT: seh $7, $7 +; MIPSELR6-NEXT: and $1, $2, $8 +; MIPSELR6-NEXT: srlv $1, $1, $10 +; MIPSELR6-NEXT: seh $1, $1 ; MIPSELR6-NEXT: # %bb.3: # %entry -; MIPSELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPSELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPSELR6-NEXT: # %bb.4: # %entry -; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPSELR6-NEXT: sync ; MIPSELR6-NEXT: addiu $sp, $sp, 8 ; MIPSELR6-NEXT: jrc $ra ; @@ -4434,38 +4434,38 @@ ; MMEL: # %bb.0: # %entry ; MMEL-NEXT: addiu $sp, $sp, -8 ; MMEL-NEXT: .cfi_def_cfa_offset 8 -; MMEL-NEXT: move $1, $5 +; MMEL-NEXT: # kill: def $at killed $a1 ; MMEL-NEXT: sync -; MMEL-NEXT: addiu $2, $zero, -4 -; MMEL-NEXT: and $2, $4, $2 -; MMEL-NEXT: andi $3, $4, 3 -; MMEL-NEXT: sll $3, $3, 3 -; MMEL-NEXT: ori $4, $zero, 255 -; MMEL-NEXT: sllv $4, $4, $3 -; MMEL-NEXT: nor $6, $zero, $4 -; MMEL-NEXT: sllv $5, $5, $3 +; MMEL-NEXT: addiu $1, $zero, -4 +; MMEL-NEXT: and $6, $4, $1 +; MMEL-NEXT: andi $1, $4, 3 +; MMEL-NEXT: sll $10, $1, 3 +; MMEL-NEXT: ori $1, $zero, 255 +; MMEL-NEXT: sllv $8, $1, $10 +; MMEL-NEXT: nor $9, $zero, $8 +; MMEL-NEXT: sllv $7, $5, $10 ; MMEL-NEXT: $BB11_1: # %entry ; MMEL-NEXT: # =>This Inner Loop Header: Depth=1 -; MMEL-NEXT: ll $8, 0($2) -; MMEL-NEXT: and $8, $8, $4 -; MMEL-NEXT: and $5, $5, $4 -; MMEL-NEXT: sltu $11, $8, $5 -; MMEL-NEXT: or $9, $8, $zero -; MMEL-NEXT: movz $9, $5, $11 -; MMEL-NEXT: and $9, $9, $4 -; MMEL-NEXT: and $10, $8, $6 -; MMEL-NEXT: or $10, $10, $9 -; MMEL-NEXT: sc $10, 0($2) -; MMEL-NEXT: beqzc $10, $BB11_1 +; MMEL-NEXT: ll $2, 0($6) +; MMEL-NEXT: and $2, $2, $8 +; MMEL-NEXT: and $7, $7, $8 +; MMEL-NEXT: sltu $5, $2, $7 +; MMEL-NEXT: or $3, $2, $zero +; MMEL-NEXT: movz $3, $7, $5 +; MMEL-NEXT: and $3, $3, $8 +; MMEL-NEXT: and $4, $2, $9 +; MMEL-NEXT: or $4, $4, $3 +; MMEL-NEXT: sc $4, 0($6) +; MMEL-NEXT: beqzc $4, $BB11_1 ; MMEL-NEXT: # %bb.2: # %entry -; MMEL-NEXT: and $7, $8, $4 -; MMEL-NEXT: srlv $7, $7, $3 -; MMEL-NEXT: seh $7, $7 +; MMEL-NEXT: and $1, $2, $8 +; MMEL-NEXT: srlv $1, $1, $10 +; MMEL-NEXT: seh $1, $1 ; MMEL-NEXT: # %bb.3: # %entry -; MMEL-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMEL-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMEL-NEXT: # %bb.4: # %entry -; MMEL-NEXT: sync ; MMEL-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMEL-NEXT: sync ; MMEL-NEXT: addiusp 8 ; MMEL-NEXT: jrc $ra ; @@ -4473,39 +4473,39 @@ ; MMELR6: # %bb.0: # %entry ; MMELR6-NEXT: addiu $sp, $sp, -8 ; MMELR6-NEXT: .cfi_def_cfa_offset 8 -; MMELR6-NEXT: move $1, $5 +; MMELR6-NEXT: # kill: def $at killed $a1 ; MMELR6-NEXT: sync -; MMELR6-NEXT: addiu $2, $zero, -4 -; MMELR6-NEXT: and $2, $4, $2 -; MMELR6-NEXT: andi $3, $4, 3 -; MMELR6-NEXT: sll $3, $3, 3 -; MMELR6-NEXT: ori $4, $zero, 255 -; MMELR6-NEXT: sllv $4, $4, $3 -; MMELR6-NEXT: nor $6, $zero, $4 -; MMELR6-NEXT: sllv $5, $5, $3 +; MMELR6-NEXT: addiu $1, $zero, -4 +; MMELR6-NEXT: and $6, $4, $1 +; MMELR6-NEXT: andi $1, $4, 3 +; MMELR6-NEXT: sll $10, $1, 3 +; MMELR6-NEXT: ori $1, $zero, 255 +; MMELR6-NEXT: sllv $8, $1, $10 +; MMELR6-NEXT: nor $9, $zero, $8 +; MMELR6-NEXT: sllv $7, $5, $10 ; MMELR6-NEXT: $BB11_1: # %entry ; MMELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MMELR6-NEXT: ll $8, 0($2) -; MMELR6-NEXT: and $8, $8, $4 -; MMELR6-NEXT: and $5, $5, $4 -; MMELR6-NEXT: sltu $11, $8, $5 -; MMELR6-NEXT: selnez $9, $8, $11 -; MMELR6-NEXT: seleqz $11, $5, $11 -; MMELR6-NEXT: or $9, $9, $11 -; MMELR6-NEXT: and $9, $9, $4 -; MMELR6-NEXT: and $10, $8, $6 -; MMELR6-NEXT: or $10, $10, $9 -; MMELR6-NEXT: sc $10, 0($2) -; MMELR6-NEXT: beqc $10, $zero, $BB11_1 +; MMELR6-NEXT: ll $2, 0($6) +; MMELR6-NEXT: and $2, $2, $8 +; MMELR6-NEXT: and $7, $7, $8 +; MMELR6-NEXT: sltu $5, $2, $7 +; MMELR6-NEXT: selnez $3, $2, $5 +; MMELR6-NEXT: seleqz $5, $7, $5 +; MMELR6-NEXT: or $3, $3, $5 +; MMELR6-NEXT: and $3, $3, $8 +; MMELR6-NEXT: and $4, $2, $9 +; MMELR6-NEXT: or $4, $4, $3 +; MMELR6-NEXT: sc $4, 0($6) +; MMELR6-NEXT: beqc $4, $zero, $BB11_1 ; MMELR6-NEXT: # %bb.2: # %entry -; MMELR6-NEXT: and $7, $8, $4 -; MMELR6-NEXT: srlv $7, $7, $3 -; MMELR6-NEXT: seh $7, $7 +; MMELR6-NEXT: and $1, $2, $8 +; MMELR6-NEXT: srlv $1, $1, $10 +; MMELR6-NEXT: seh $1, $1 ; MMELR6-NEXT: # %bb.3: # %entry -; MMELR6-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MMELR6-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MMELR6-NEXT: # %bb.4: # %entry -; MMELR6-NEXT: sync ; MMELR6-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MMELR6-NEXT: sync ; MMELR6-NEXT: addiu $sp, $sp, 8 ; MMELR6-NEXT: jrc $ra ; @@ -4513,38 +4513,38 @@ ; MIPS64: # %bb.0: # %entry ; MIPS64-NEXT: daddiu $sp, $sp, -16 ; MIPS64-NEXT: .cfi_def_cfa_offset 16 -; MIPS64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64-NEXT: move $1, $5 ; MIPS64-NEXT: sync -; MIPS64-NEXT: daddiu $1, $zero, -4 -; MIPS64-NEXT: and $1, $4, $1 +; MIPS64-NEXT: daddiu $2, $zero, -4 +; MIPS64-NEXT: and $6, $4, $2 ; MIPS64-NEXT: andi $2, $4, 3 ; MIPS64-NEXT: xori $2, $2, 3 -; MIPS64-NEXT: sll $2, $2, 3 -; MIPS64-NEXT: ori $3, $zero, 255 -; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $4, $zero, $3 -; MIPS64-NEXT: sllv $5, $5, $2 +; MIPS64-NEXT: sll $10, $2, 3 +; MIPS64-NEXT: ori $2, $zero, 255 +; MIPS64-NEXT: sllv $8, $2, $10 +; MIPS64-NEXT: nor $9, $zero, $8 +; MIPS64-NEXT: sllv $7, $1, $10 ; MIPS64-NEXT: .LBB11_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $7, 0($1) -; MIPS64-NEXT: sltu $10, $7, $5 -; MIPS64-NEXT: move $8, $7 -; MIPS64-NEXT: movz $8, $5, $10 -; MIPS64-NEXT: and $8, $8, $3 -; MIPS64-NEXT: and $9, $7, $4 -; MIPS64-NEXT: or $9, $9, $8 -; MIPS64-NEXT: sc $9, 0($1) -; MIPS64-NEXT: beqz $9, .LBB11_1 +; MIPS64-NEXT: ll $2, 0($6) +; MIPS64-NEXT: sltu $5, $2, $7 +; MIPS64-NEXT: move $3, $2 +; MIPS64-NEXT: movz $3, $7, $5 +; MIPS64-NEXT: and $3, $3, $8 +; MIPS64-NEXT: and $4, $2, $9 +; MIPS64-NEXT: or $4, $4, $3 +; MIPS64-NEXT: sc $4, 0($6) +; MIPS64-NEXT: beqz $4, .LBB11_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $6, $7, $3 -; MIPS64-NEXT: srlv $6, $6, $2 -; MIPS64-NEXT: seh $6, $6 +; MIPS64-NEXT: and $1, $2, $8 +; MIPS64-NEXT: srlv $1, $1, $10 +; MIPS64-NEXT: seh $1, $1 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry -; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64-NEXT: sync ; MIPS64-NEXT: daddiu $sp, $sp, 16 ; MIPS64-NEXT: jr $ra ; MIPS64-NEXT: nop @@ -4553,38 +4553,38 @@ ; MIPS64R6: # %bb.0: # %entry ; MIPS64R6-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64R6-NEXT: move $1, $5 ; MIPS64R6-NEXT: sync -; MIPS64R6-NEXT: daddiu $1, $zero, -4 -; MIPS64R6-NEXT: and $1, $4, $1 +; MIPS64R6-NEXT: daddiu $2, $zero, -4 +; MIPS64R6-NEXT: and $6, $4, $2 ; MIPS64R6-NEXT: andi $2, $4, 3 ; MIPS64R6-NEXT: xori $2, $2, 3 -; MIPS64R6-NEXT: sll $2, $2, 3 -; MIPS64R6-NEXT: ori $3, $zero, 255 -; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $4, $zero, $3 -; MIPS64R6-NEXT: sllv $5, $5, $2 +; MIPS64R6-NEXT: sll $10, $2, 3 +; MIPS64R6-NEXT: ori $2, $zero, 255 +; MIPS64R6-NEXT: sllv $8, $2, $10 +; MIPS64R6-NEXT: nor $9, $zero, $8 +; MIPS64R6-NEXT: sllv $7, $1, $10 ; MIPS64R6-NEXT: .LBB11_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $7, 0($1) -; MIPS64R6-NEXT: sltu $10, $7, $5 -; MIPS64R6-NEXT: selnez $8, $7, $10 -; MIPS64R6-NEXT: seleqz $10, $5, $10 -; MIPS64R6-NEXT: or $8, $8, $10 -; MIPS64R6-NEXT: and $8, $8, $3 -; MIPS64R6-NEXT: and $9, $7, $4 -; MIPS64R6-NEXT: or $9, $9, $8 -; MIPS64R6-NEXT: sc $9, 0($1) -; MIPS64R6-NEXT: beqzc $9, .LBB11_1 +; MIPS64R6-NEXT: ll $2, 0($6) +; MIPS64R6-NEXT: sltu $5, $2, $7 +; MIPS64R6-NEXT: selnez $3, $2, $5 +; MIPS64R6-NEXT: seleqz $5, $7, $5 +; MIPS64R6-NEXT: or $3, $3, $5 +; MIPS64R6-NEXT: and $3, $3, $8 +; MIPS64R6-NEXT: and $4, $2, $9 +; MIPS64R6-NEXT: or $4, $4, $3 +; MIPS64R6-NEXT: sc $4, 0($6) +; MIPS64R6-NEXT: beqzc $4, .LBB11_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $6, $7, $3 -; MIPS64R6-NEXT: srlv $6, $6, $2 -; MIPS64R6-NEXT: seh $6, $6 +; MIPS64R6-NEXT: and $1, $2, $8 +; MIPS64R6-NEXT: srlv $1, $1, $10 +; MIPS64R6-NEXT: seh $1, $1 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry -; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6-NEXT: jrc $ra ; @@ -4592,39 +4592,39 @@ ; MIPS64EL: # %bb.0: # %entry ; MIPS64EL-NEXT: daddiu $sp, $sp, -16 ; MIPS64EL-NEXT: .cfi_def_cfa_offset 16 -; MIPS64EL-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64EL-NEXT: move $1, $5 ; MIPS64EL-NEXT: sync -; MIPS64EL-NEXT: daddiu $1, $zero, -4 -; MIPS64EL-NEXT: and $1, $4, $1 +; MIPS64EL-NEXT: daddiu $2, $zero, -4 +; MIPS64EL-NEXT: and $6, $4, $2 ; MIPS64EL-NEXT: andi $2, $4, 3 -; MIPS64EL-NEXT: sll $2, $2, 3 -; MIPS64EL-NEXT: ori $3, $zero, 255 -; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $4, $zero, $3 -; MIPS64EL-NEXT: sllv $5, $5, $2 +; MIPS64EL-NEXT: sll $10, $2, 3 +; MIPS64EL-NEXT: ori $2, $zero, 255 +; MIPS64EL-NEXT: sllv $8, $2, $10 +; MIPS64EL-NEXT: nor $9, $zero, $8 +; MIPS64EL-NEXT: sllv $7, $1, $10 ; MIPS64EL-NEXT: .LBB11_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $7, 0($1) -; MIPS64EL-NEXT: and $7, $7, $3 -; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $10, $7, $5 -; MIPS64EL-NEXT: move $8, $7 -; MIPS64EL-NEXT: movz $8, $5, $10 -; MIPS64EL-NEXT: and $8, $8, $3 -; MIPS64EL-NEXT: and $9, $7, $4 -; MIPS64EL-NEXT: or $9, $9, $8 -; MIPS64EL-NEXT: sc $9, 0($1) -; MIPS64EL-NEXT: beqz $9, .LBB11_1 +; MIPS64EL-NEXT: ll $2, 0($6) +; MIPS64EL-NEXT: and $2, $2, $8 +; MIPS64EL-NEXT: and $7, $7, $8 +; MIPS64EL-NEXT: sltu $5, $2, $7 +; MIPS64EL-NEXT: move $3, $2 +; MIPS64EL-NEXT: movz $3, $7, $5 +; MIPS64EL-NEXT: and $3, $3, $8 +; MIPS64EL-NEXT: and $4, $2, $9 +; MIPS64EL-NEXT: or $4, $4, $3 +; MIPS64EL-NEXT: sc $4, 0($6) +; MIPS64EL-NEXT: beqz $4, .LBB11_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $6, $7, $3 -; MIPS64EL-NEXT: srlv $6, $6, $2 -; MIPS64EL-NEXT: seh $6, $6 +; MIPS64EL-NEXT: and $1, $2, $8 +; MIPS64EL-NEXT: srlv $1, $1, $10 +; MIPS64EL-NEXT: seh $1, $1 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry -; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: daddiu $sp, $sp, 16 ; MIPS64EL-NEXT: jr $ra ; MIPS64EL-NEXT: nop @@ -4633,39 +4633,39 @@ ; MIPS64ELR6: # %bb.0: # %entry ; MIPS64ELR6-NEXT: daddiu $sp, $sp, -16 ; MIPS64ELR6-NEXT: .cfi_def_cfa_offset 16 -; MIPS64ELR6-NEXT: # kill: def $a1 killed $a1 killed $a1_64 +; MIPS64ELR6-NEXT: move $1, $5 ; MIPS64ELR6-NEXT: sync -; MIPS64ELR6-NEXT: daddiu $1, $zero, -4 -; MIPS64ELR6-NEXT: and $1, $4, $1 +; MIPS64ELR6-NEXT: daddiu $2, $zero, -4 +; MIPS64ELR6-NEXT: and $6, $4, $2 ; MIPS64ELR6-NEXT: andi $2, $4, 3 -; MIPS64ELR6-NEXT: sll $2, $2, 3 -; MIPS64ELR6-NEXT: ori $3, $zero, 255 -; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $4, $zero, $3 -; MIPS64ELR6-NEXT: sllv $5, $5, $2 +; MIPS64ELR6-NEXT: sll $10, $2, 3 +; MIPS64ELR6-NEXT: ori $2, $zero, 255 +; MIPS64ELR6-NEXT: sllv $8, $2, $10 +; MIPS64ELR6-NEXT: nor $9, $zero, $8 +; MIPS64ELR6-NEXT: sllv $7, $1, $10 ; MIPS64ELR6-NEXT: .LBB11_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $7, 0($1) -; MIPS64ELR6-NEXT: and $7, $7, $3 -; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $10, $7, $5 -; MIPS64ELR6-NEXT: selnez $8, $7, $10 -; MIPS64ELR6-NEXT: seleqz $10, $5, $10 -; MIPS64ELR6-NEXT: or $8, $8, $10 -; MIPS64ELR6-NEXT: and $8, $8, $3 -; MIPS64ELR6-NEXT: and $9, $7, $4 -; MIPS64ELR6-NEXT: or $9, $9, $8 -; MIPS64ELR6-NEXT: sc $9, 0($1) -; MIPS64ELR6-NEXT: beqzc $9, .LBB11_1 +; MIPS64ELR6-NEXT: ll $2, 0($6) +; MIPS64ELR6-NEXT: and $2, $2, $8 +; MIPS64ELR6-NEXT: and $7, $7, $8 +; MIPS64ELR6-NEXT: sltu $5, $2, $7 +; MIPS64ELR6-NEXT: selnez $3, $2, $5 +; MIPS64ELR6-NEXT: seleqz $5, $7, $5 +; MIPS64ELR6-NEXT: or $3, $3, $5 +; MIPS64ELR6-NEXT: and $3, $3, $8 +; MIPS64ELR6-NEXT: and $4, $2, $9 +; MIPS64ELR6-NEXT: or $4, $4, $3 +; MIPS64ELR6-NEXT: sc $4, 0($6) +; MIPS64ELR6-NEXT: beqzc $4, .LBB11_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $6, $7, $3 -; MIPS64ELR6-NEXT: srlv $6, $6, $2 -; MIPS64ELR6-NEXT: seh $6, $6 +; MIPS64ELR6-NEXT: and $1, $2, $8 +; MIPS64ELR6-NEXT: srlv $1, $1, $10 +; MIPS64ELR6-NEXT: seh $1, $1 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry -; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: daddiu $sp, $sp, 16 ; MIPS64ELR6-NEXT: jrc $ra entry: Index: llvm/test/CodeGen/Mips/atomic.ll =================================================================== --- llvm/test/CodeGen/Mips/atomic.ll +++ llvm/test/CodeGen/Mips/atomic.ll @@ -57,13 +57,13 @@ ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 -; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB0_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: addu $3, $2, $4 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB0_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: addu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB0_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: jr $ra @@ -107,13 +107,13 @@ ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB0_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: addu $3, $2, $4 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB0_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: addu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB0_1 ; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: jrc $ra @@ -191,13 +191,13 @@ ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB0_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: addu $3, $2, $4 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB0_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: addu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB0_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -313,13 +313,13 @@ ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 -; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB1_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: subu $3, $2, $4 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB1_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: subu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB1_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: jr $ra @@ -363,13 +363,13 @@ ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB1_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: subu $3, $2, $4 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB1_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: subu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB1_1 ; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: jrc $ra @@ -447,13 +447,13 @@ ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub32))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB1_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: subu $3, $2, $4 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB1_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: subu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB1_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -569,13 +569,13 @@ ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 -; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB2_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: xor $3, $2, $4 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB2_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: xor $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB2_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: jr $ra @@ -619,13 +619,13 @@ ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB2_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: xor $3, $2, $4 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB2_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: xor $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB2_1 ; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: jrc $ra @@ -703,13 +703,13 @@ ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor32))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB2_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: xor $3, $2, $4 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB2_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: xor $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB2_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -824,13 +824,13 @@ ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 -; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB3_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: or $3, $2, $4 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB3_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: or $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB3_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: jr $ra @@ -874,13 +874,13 @@ ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB3_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: or $3, $2, $4 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB3_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: or $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB3_1 ; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: jrc $ra @@ -958,13 +958,13 @@ ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr32))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB3_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: or $3, $2, $4 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB3_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: or $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB3_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -1079,13 +1079,13 @@ ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 -; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB4_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: and $3, $2, $4 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB4_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: and $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB4_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: jr $ra @@ -1129,13 +1129,13 @@ ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB4_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: and $3, $2, $4 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB4_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: and $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB4_1 ; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: jrc $ra @@ -1213,13 +1213,13 @@ ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd32))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB4_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: and $3, $2, $4 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB4_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB4_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -1335,14 +1335,14 @@ ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 -; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB5_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: and $3, $2, $4 -; MIPS32O0-NEXT: nor $3, $zero, $3 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB5_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: and $1, $2, $4 +; MIPS32O0-NEXT: nor $1, $zero, $1 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB5_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: jr $ra @@ -1388,14 +1388,14 @@ ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB5_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: and $3, $2, $4 -; MIPS32R6O0-NEXT: nor $3, $zero, $3 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB5_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: and $1, $2, $4 +; MIPS32R6O0-NEXT: nor $1, $zero, $1 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB5_1 ; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: jrc $ra @@ -1477,14 +1477,14 @@ ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand32))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB5_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: and $3, $2, $4 -; MIPS64R6O0-NEXT: nor $3, $zero, $3 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB5_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: nor $1, $zero, $1 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB5_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -1609,17 +1609,16 @@ ; MIPS32O0-NEXT: addiu $sp, $sp, -8 ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: sw $4, 4($sp) -; MIPS32O0-NEXT: lw $2, 4($sp) -; MIPS32O0-NEXT: lw $1, %got(x)($1) +; MIPS32O0-NEXT: lw $4, 4($sp) +; MIPS32O0-NEXT: lw $3, %got(x)($1) ; MIPS32O0-NEXT: $BB6_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $3, 0($1) -; MIPS32O0-NEXT: move $4, $2 -; MIPS32O0-NEXT: sc $4, 0($1) -; MIPS32O0-NEXT: beqz $4, $BB6_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: move $1, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB6_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: move $2, $3 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -1669,16 +1668,15 @@ ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: addu $1, $2, $25 ; MIPS32R6O0-NEXT: sw $4, 4($sp) -; MIPS32R6O0-NEXT: lw $2, 4($sp) -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $4, 4($sp) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB6_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $3, 0($1) -; MIPS32R6O0-NEXT: move $4, $2 -; MIPS32R6O0-NEXT: sc $4, 0($1) -; MIPS32R6O0-NEXT: beqzc $4, $BB6_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: move $1, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB6_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: move $2, $3 ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; @@ -1764,18 +1762,17 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap32))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap32))) -; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: sw $4, 12($sp) -; MIPS64R6O0-NEXT: lw $2, 12($sp) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: move $2, $4 +; MIPS64R6O0-NEXT: sw $2, 12($sp) +; MIPS64R6O0-NEXT: lw $4, 12($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB6_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $3, 0($1) -; MIPS64R6O0-NEXT: move $4, $2 -; MIPS64R6O0-NEXT: sc $4, 0($1) -; MIPS64R6O0-NEXT: beqzc $4, .LBB6_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB6_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; @@ -1912,24 +1909,23 @@ ; MIPS32O0-NEXT: addiu $sp, $sp, -8 ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: sw $5, 4($sp) -; MIPS32O0-NEXT: lw $2, 4($sp) -; MIPS32O0-NEXT: lw $1, %got(x)($1) -; MIPS32O0-NEXT: move $3, $4 +; MIPS32O0-NEXT: lw $6, 4($sp) +; MIPS32O0-NEXT: lw $3, %got(x)($1) +; MIPS32O0-NEXT: move $5, $4 ; MIPS32O0-NEXT: $BB7_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $5, 0($1) -; MIPS32O0-NEXT: bne $5, $3, $BB7_3 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: bne $2, $5, $BB7_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: # in Loop: Header=BB7_1 Depth=1 -; MIPS32O0-NEXT: move $6, $2 -; MIPS32O0-NEXT: sc $6, 0($1) -; MIPS32O0-NEXT: beqz $6, $BB7_1 +; MIPS32O0-NEXT: move $1, $6 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB7_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB7_3: # %entry -; MIPS32O0-NEXT: xor $1, $5, $4 +; MIPS32O0-NEXT: xor $1, $2, $4 ; MIPS32O0-NEXT: sltiu $1, $1, 1 -; MIPS32O0-NEXT: move $2, $5 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -1986,19 +1982,18 @@ ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: addu $1, $2, $25 ; MIPS32R6O0-NEXT: sw $5, 4($sp) -; MIPS32R6O0-NEXT: lw $2, 4($sp) -; MIPS32R6O0-NEXT: lw $1, %got(x)($1) +; MIPS32R6O0-NEXT: lw $5, 4($sp) +; MIPS32R6O0-NEXT: lw $3, %got(x)($1) ; MIPS32R6O0-NEXT: $BB7_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $3, 0($1) -; MIPS32R6O0-NEXT: bnec $3, $4, $BB7_3 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: bnec $2, $4, $BB7_3 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 -; MIPS32R6O0-NEXT: move $5, $2 -; MIPS32R6O0-NEXT: sc $5, 0($1) -; MIPS32R6O0-NEXT: beqzc $5, $BB7_1 +; MIPS32R6O0-NEXT: move $1, $5 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB7_1 ; MIPS32R6O0-NEXT: $BB7_3: # %entry -; MIPS32R6O0-NEXT: move $2, $3 ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; @@ -2100,21 +2095,20 @@ ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap32))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64 -; MIPS64R6O0-NEXT: sw $5, 12($sp) -; MIPS64R6O0-NEXT: lw $2, 12($sp) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: move $2, $5 +; MIPS64R6O0-NEXT: sw $2, 12($sp) +; MIPS64R6O0-NEXT: lw $5, 12($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB7_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $3, 0($1) -; MIPS64R6O0-NEXT: bnec $3, $4, .LBB7_3 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: bnec $2, $4, .LBB7_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 -; MIPS64R6O0-NEXT: move $5, $2 -; MIPS64R6O0-NEXT: sc $5, 0($1) -; MIPS64R6O0-NEXT: beqzc $5, .LBB7_1 +; MIPS64R6O0-NEXT: move $1, $5 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB7_1 ; MIPS64R6O0-NEXT: .LBB7_3: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; @@ -2286,34 +2280,34 @@ ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: lw $1, %got(y)($1) ; MIPS32O0-NEXT: addiu $2, $zero, -4 -; MIPS32O0-NEXT: and $2, $1, $2 +; MIPS32O0-NEXT: and $5, $1, $2 ; MIPS32O0-NEXT: andi $1, $1, 3 -; MIPS32O0-NEXT: sll $1, $1, 3 -; MIPS32O0-NEXT: ori $3, $zero, 255 -; MIPS32O0-NEXT: sllv $3, $3, $1 -; MIPS32O0-NEXT: nor $5, $zero, $3 -; MIPS32O0-NEXT: sllv $4, $4, $1 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 255 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB8_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $7, 0($2) -; MIPS32O0-NEXT: addu $8, $7, $4 -; MIPS32O0-NEXT: and $8, $8, $3 -; MIPS32O0-NEXT: and $9, $7, $5 -; MIPS32O0-NEXT: or $9, $9, $8 -; MIPS32O0-NEXT: sc $9, 0($2) -; MIPS32O0-NEXT: beqz $9, $BB8_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: addu $3, $2, $6 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB8_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $6, $7, $3 -; MIPS32O0-NEXT: srlv $6, $6, $1 -; MIPS32O0-NEXT: sll $6, $6, 24 -; MIPS32O0-NEXT: sra $6, $6, 24 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -2387,31 +2381,31 @@ ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: move $2, $4 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ; MIPS32R6O0-NEXT: lw $1, %got(y)($1) -; MIPS32R6O0-NEXT: addiu $3, $zero, -4 -; MIPS32R6O0-NEXT: and $3, $1, $3 +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 ; MIPS32R6O0-NEXT: andi $1, $1, 3 -; MIPS32R6O0-NEXT: sll $1, $1, 3 -; MIPS32R6O0-NEXT: ori $5, $zero, 255 -; MIPS32R6O0-NEXT: sllv $5, $5, $1 -; MIPS32R6O0-NEXT: nor $6, $zero, $5 -; MIPS32R6O0-NEXT: sllv $4, $4, $1 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB8_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $8, 0($3) -; MIPS32R6O0-NEXT: addu $9, $8, $4 -; MIPS32R6O0-NEXT: and $9, $9, $5 -; MIPS32R6O0-NEXT: and $10, $8, $6 -; MIPS32R6O0-NEXT: or $10, $10, $9 -; MIPS32R6O0-NEXT: sc $10, 0($3) -; MIPS32R6O0-NEXT: beqzc $10, $BB8_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: addu $3, $2, $6 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB8_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $7, $8, $5 -; MIPS32R6O0-NEXT: srlv $7, $7, $1 -; MIPS32R6O0-NEXT: seb $7, $7 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry ; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seb $2, $1 @@ -2554,33 +2548,33 @@ ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) -; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) -; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 -; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadAdd8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB8_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: addu $8, $7, $4 -; MIPS64R6O0-NEXT: and $8, $8, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB8_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: addu $3, $2, $6 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB8_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seb $6, $6 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -2802,34 +2796,34 @@ ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: lw $1, %got(y)($1) ; MIPS32O0-NEXT: addiu $2, $zero, -4 -; MIPS32O0-NEXT: and $2, $1, $2 +; MIPS32O0-NEXT: and $5, $1, $2 ; MIPS32O0-NEXT: andi $1, $1, 3 -; MIPS32O0-NEXT: sll $1, $1, 3 -; MIPS32O0-NEXT: ori $3, $zero, 255 -; MIPS32O0-NEXT: sllv $3, $3, $1 -; MIPS32O0-NEXT: nor $5, $zero, $3 -; MIPS32O0-NEXT: sllv $4, $4, $1 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 255 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB9_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $7, 0($2) -; MIPS32O0-NEXT: subu $8, $7, $4 -; MIPS32O0-NEXT: and $8, $8, $3 -; MIPS32O0-NEXT: and $9, $7, $5 -; MIPS32O0-NEXT: or $9, $9, $8 -; MIPS32O0-NEXT: sc $9, 0($2) -; MIPS32O0-NEXT: beqz $9, $BB9_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: subu $3, $2, $6 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB9_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $6, $7, $3 -; MIPS32O0-NEXT: srlv $6, $6, $1 -; MIPS32O0-NEXT: sll $6, $6, 24 -; MIPS32O0-NEXT: sra $6, $6, 24 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -2903,31 +2897,31 @@ ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: move $2, $4 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ; MIPS32R6O0-NEXT: lw $1, %got(y)($1) -; MIPS32R6O0-NEXT: addiu $3, $zero, -4 -; MIPS32R6O0-NEXT: and $3, $1, $3 +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 ; MIPS32R6O0-NEXT: andi $1, $1, 3 -; MIPS32R6O0-NEXT: sll $1, $1, 3 -; MIPS32R6O0-NEXT: ori $5, $zero, 255 -; MIPS32R6O0-NEXT: sllv $5, $5, $1 -; MIPS32R6O0-NEXT: nor $6, $zero, $5 -; MIPS32R6O0-NEXT: sllv $4, $4, $1 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB9_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $8, 0($3) -; MIPS32R6O0-NEXT: subu $9, $8, $4 -; MIPS32R6O0-NEXT: and $9, $9, $5 -; MIPS32R6O0-NEXT: and $10, $8, $6 -; MIPS32R6O0-NEXT: or $10, $10, $9 -; MIPS32R6O0-NEXT: sc $10, 0($3) -; MIPS32R6O0-NEXT: beqzc $10, $BB9_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: subu $3, $2, $6 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB9_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $7, $8, $5 -; MIPS32R6O0-NEXT: srlv $7, $7, $1 -; MIPS32R6O0-NEXT: seb $7, $7 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry ; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seb $2, $1 @@ -3070,33 +3064,33 @@ ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) -; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) -; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 -; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadSub8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB9_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: subu $8, $7, $4 -; MIPS64R6O0-NEXT: and $8, $8, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB9_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: subu $3, $2, $6 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB9_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seb $6, $6 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -3320,35 +3314,35 @@ ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: lw $1, %got(y)($1) ; MIPS32O0-NEXT: addiu $2, $zero, -4 -; MIPS32O0-NEXT: and $2, $1, $2 +; MIPS32O0-NEXT: and $5, $1, $2 ; MIPS32O0-NEXT: andi $1, $1, 3 -; MIPS32O0-NEXT: sll $1, $1, 3 -; MIPS32O0-NEXT: ori $3, $zero, 255 -; MIPS32O0-NEXT: sllv $3, $3, $1 -; MIPS32O0-NEXT: nor $5, $zero, $3 -; MIPS32O0-NEXT: sllv $4, $4, $1 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 255 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB10_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $7, 0($2) -; MIPS32O0-NEXT: and $8, $7, $4 -; MIPS32O0-NEXT: nor $8, $zero, $8 -; MIPS32O0-NEXT: and $8, $8, $3 -; MIPS32O0-NEXT: and $9, $7, $5 -; MIPS32O0-NEXT: or $9, $9, $8 -; MIPS32O0-NEXT: sc $9, 0($2) -; MIPS32O0-NEXT: beqz $9, $BB10_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: and $3, $2, $6 +; MIPS32O0-NEXT: nor $3, $zero, $3 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB10_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $6, $7, $3 -; MIPS32O0-NEXT: srlv $6, $6, $1 -; MIPS32O0-NEXT: sll $6, $6, 24 -; MIPS32O0-NEXT: sra $6, $6, 24 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -3424,32 +3418,32 @@ ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: move $2, $4 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ; MIPS32R6O0-NEXT: lw $1, %got(y)($1) -; MIPS32R6O0-NEXT: addiu $3, $zero, -4 -; MIPS32R6O0-NEXT: and $3, $1, $3 +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 ; MIPS32R6O0-NEXT: andi $1, $1, 3 -; MIPS32R6O0-NEXT: sll $1, $1, 3 -; MIPS32R6O0-NEXT: ori $5, $zero, 255 -; MIPS32R6O0-NEXT: sllv $5, $5, $1 -; MIPS32R6O0-NEXT: nor $6, $zero, $5 -; MIPS32R6O0-NEXT: sllv $4, $4, $1 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB10_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $8, 0($3) -; MIPS32R6O0-NEXT: and $9, $8, $4 -; MIPS32R6O0-NEXT: nor $9, $zero, $9 -; MIPS32R6O0-NEXT: and $9, $9, $5 -; MIPS32R6O0-NEXT: and $10, $8, $6 -; MIPS32R6O0-NEXT: or $10, $10, $9 -; MIPS32R6O0-NEXT: sc $10, 0($3) -; MIPS32R6O0-NEXT: beqzc $10, $BB10_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: and $3, $2, $6 +; MIPS32R6O0-NEXT: nor $3, $zero, $3 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB10_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $7, $8, $5 -; MIPS32R6O0-NEXT: srlv $7, $7, $1 -; MIPS32R6O0-NEXT: seb $7, $7 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry ; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seb $2, $1 @@ -3596,34 +3590,34 @@ ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) -; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) -; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 -; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadNand8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB10_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: and $8, $7, $4 -; MIPS64R6O0-NEXT: nor $8, $zero, $8 -; MIPS64R6O0-NEXT: and $8, $8, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB10_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: and $3, $2, $6 +; MIPS64R6O0-NEXT: nor $3, $zero, $3 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB10_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seb $6, $6 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -3850,33 +3844,33 @@ ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: lw $1, %got(y)($1) ; MIPS32O0-NEXT: addiu $2, $zero, -4 -; MIPS32O0-NEXT: and $2, $1, $2 +; MIPS32O0-NEXT: and $5, $1, $2 ; MIPS32O0-NEXT: andi $1, $1, 3 -; MIPS32O0-NEXT: sll $1, $1, 3 -; MIPS32O0-NEXT: ori $3, $zero, 255 -; MIPS32O0-NEXT: sllv $3, $3, $1 -; MIPS32O0-NEXT: nor $5, $zero, $3 -; MIPS32O0-NEXT: sllv $4, $4, $1 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 255 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB11_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $7, 0($2) -; MIPS32O0-NEXT: and $8, $4, $3 -; MIPS32O0-NEXT: and $9, $7, $5 -; MIPS32O0-NEXT: or $9, $9, $8 -; MIPS32O0-NEXT: sc $9, 0($2) -; MIPS32O0-NEXT: beqz $9, $BB11_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: and $3, $6, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB11_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $6, $7, $3 -; MIPS32O0-NEXT: srlv $6, $6, $1 -; MIPS32O0-NEXT: sll $6, $6, 24 -; MIPS32O0-NEXT: sra $6, $6, 24 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -3948,30 +3942,30 @@ ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: move $2, $4 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ; MIPS32R6O0-NEXT: lw $1, %got(y)($1) -; MIPS32R6O0-NEXT: addiu $3, $zero, -4 -; MIPS32R6O0-NEXT: and $3, $1, $3 +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 ; MIPS32R6O0-NEXT: andi $1, $1, 3 -; MIPS32R6O0-NEXT: sll $1, $1, 3 -; MIPS32R6O0-NEXT: ori $5, $zero, 255 -; MIPS32R6O0-NEXT: sllv $5, $5, $1 -; MIPS32R6O0-NEXT: nor $6, $zero, $5 -; MIPS32R6O0-NEXT: sllv $4, $4, $1 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 255 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB11_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $8, 0($3) -; MIPS32R6O0-NEXT: and $9, $4, $5 -; MIPS32R6O0-NEXT: and $10, $8, $6 -; MIPS32R6O0-NEXT: or $10, $10, $9 -; MIPS32R6O0-NEXT: sc $10, 0($3) -; MIPS32R6O0-NEXT: beqzc $10, $BB11_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: and $3, $6, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB11_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $7, $8, $5 -; MIPS32R6O0-NEXT: srlv $7, $7, $1 -; MIPS32R6O0-NEXT: seb $7, $7 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry ; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seb $2, $1 @@ -4110,32 +4104,32 @@ ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicSwap8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap8))) -; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) -; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 -; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicSwap8))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(y)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 3 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 255 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB11_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: and $8, $4, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB11_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: and $3, $6, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB11_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seb $6, $6 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -4354,42 +4348,44 @@ ; MIPS32O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addiu $sp, $sp, -8 -; MIPS32O0-NEXT: addu $1, $2, $25 -; MIPS32O0-NEXT: lw $1, %got(y)($1) -; MIPS32O0-NEXT: addiu $2, $zero, -4 -; MIPS32O0-NEXT: and $2, $1, $2 -; MIPS32O0-NEXT: andi $1, $1, 3 -; MIPS32O0-NEXT: sll $1, $1, 3 +; MIPS32O0-NEXT: addu $3, $2, $25 +; MIPS32O0-NEXT: move $1, $5 +; MIPS32O0-NEXT: move $2, $4 +; MIPS32O0-NEXT: lw $3, %got(y)($3) +; MIPS32O0-NEXT: addiu $4, $zero, -4 +; MIPS32O0-NEXT: and $4, $3, $4 +; MIPS32O0-NEXT: andi $3, $3, 3 +; MIPS32O0-NEXT: sll $9, $3, 3 ; MIPS32O0-NEXT: ori $3, $zero, 255 -; MIPS32O0-NEXT: sllv $3, $3, $1 -; MIPS32O0-NEXT: nor $6, $zero, $3 -; MIPS32O0-NEXT: andi $4, $4, 255 -; MIPS32O0-NEXT: sllv $4, $4, $1 -; MIPS32O0-NEXT: andi $5, $5, 255 -; MIPS32O0-NEXT: sllv $5, $5, $1 +; MIPS32O0-NEXT: sllv $5, $3, $9 +; MIPS32O0-NEXT: nor $7, $zero, $5 +; MIPS32O0-NEXT: andi $2, $2, 255 +; MIPS32O0-NEXT: sllv $6, $2, $9 +; MIPS32O0-NEXT: andi $1, $1, 255 +; MIPS32O0-NEXT: sllv $8, $1, $9 ; MIPS32O0-NEXT: $BB12_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $8, 0($2) -; MIPS32O0-NEXT: and $9, $8, $3 -; MIPS32O0-NEXT: bne $9, $4, $BB12_3 +; MIPS32O0-NEXT: ll $2, 0($4) +; MIPS32O0-NEXT: and $3, $2, $5 +; MIPS32O0-NEXT: bne $3, $6, $BB12_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: # in Loop: Header=BB12_1 Depth=1 -; MIPS32O0-NEXT: and $8, $8, $6 -; MIPS32O0-NEXT: or $8, $8, $5 -; MIPS32O0-NEXT: sc $8, 0($2) -; MIPS32O0-NEXT: beqz $8, $BB12_1 +; MIPS32O0-NEXT: and $2, $2, $7 +; MIPS32O0-NEXT: or $2, $2, $8 +; MIPS32O0-NEXT: sc $2, 0($4) +; MIPS32O0-NEXT: beqz $2, $BB12_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB12_3: # %entry -; MIPS32O0-NEXT: srlv $7, $9, $1 -; MIPS32O0-NEXT: sll $7, $7, 24 -; MIPS32O0-NEXT: sra $7, $7, 24 +; MIPS32O0-NEXT: srlv $1, $3, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.4: # %entry -; MIPS32O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.5: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 -; MIPS32O0-NEXT: sra $2, $2, 24 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $2, $1, 24 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -4470,37 +4466,39 @@ ; MIPS32R6O0-NEXT: lui $2, %hi(_gp_disp) ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 -; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: move $2, $5 -; MIPS32R6O0-NEXT: move $3, $4 -; MIPS32R6O0-NEXT: lw $1, %got(y)($1) -; MIPS32R6O0-NEXT: addiu $6, $zero, -4 -; MIPS32R6O0-NEXT: and $6, $1, $6 -; MIPS32R6O0-NEXT: andi $1, $1, 3 -; MIPS32R6O0-NEXT: sll $1, $1, 3 -; MIPS32R6O0-NEXT: ori $7, $zero, 255 -; MIPS32R6O0-NEXT: sllv $7, $7, $1 -; MIPS32R6O0-NEXT: nor $8, $zero, $7 -; MIPS32R6O0-NEXT: andi $4, $4, 255 -; MIPS32R6O0-NEXT: sllv $4, $4, $1 -; MIPS32R6O0-NEXT: andi $5, $5, 255 -; MIPS32R6O0-NEXT: sllv $5, $5, $1 +; MIPS32R6O0-NEXT: addu $3, $2, $25 +; MIPS32R6O0-NEXT: move $1, $5 +; MIPS32R6O0-NEXT: move $2, $4 +; MIPS32R6O0-NEXT: # kill: def $a1 killed $at +; MIPS32R6O0-NEXT: # kill: def $a0 killed $v0 +; MIPS32R6O0-NEXT: lw $3, %got(y)($3) +; MIPS32R6O0-NEXT: addiu $4, $zero, -4 +; MIPS32R6O0-NEXT: and $4, $3, $4 +; MIPS32R6O0-NEXT: andi $3, $3, 3 +; MIPS32R6O0-NEXT: sll $9, $3, 3 +; MIPS32R6O0-NEXT: ori $3, $zero, 255 +; MIPS32R6O0-NEXT: sllv $5, $3, $9 +; MIPS32R6O0-NEXT: nor $7, $zero, $5 +; MIPS32R6O0-NEXT: andi $2, $2, 255 +; MIPS32R6O0-NEXT: sllv $6, $2, $9 +; MIPS32R6O0-NEXT: andi $1, $1, 255 +; MIPS32R6O0-NEXT: sllv $8, $1, $9 ; MIPS32R6O0-NEXT: $BB12_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $10, 0($6) -; MIPS32R6O0-NEXT: and $11, $10, $7 -; MIPS32R6O0-NEXT: bnec $11, $4, $BB12_3 +; MIPS32R6O0-NEXT: ll $2, 0($4) +; MIPS32R6O0-NEXT: and $3, $2, $5 +; MIPS32R6O0-NEXT: bnec $3, $6, $BB12_3 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 -; MIPS32R6O0-NEXT: and $10, $10, $8 -; MIPS32R6O0-NEXT: or $10, $10, $5 -; MIPS32R6O0-NEXT: sc $10, 0($6) -; MIPS32R6O0-NEXT: beqzc $10, $BB12_1 +; MIPS32R6O0-NEXT: and $2, $2, $7 +; MIPS32R6O0-NEXT: or $2, $2, $8 +; MIPS32R6O0-NEXT: sc $2, 0($4) +; MIPS32R6O0-NEXT: beqzc $2, $BB12_1 ; MIPS32R6O0-NEXT: $BB12_3: # %entry -; MIPS32R6O0-NEXT: srlv $9, $11, $1 -; MIPS32R6O0-NEXT: seb $9, $9 +; MIPS32R6O0-NEXT: srlv $1, $3, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.4: # %entry -; MIPS32R6O0-NEXT: sw $9, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.5: # %entry ; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 @@ -4660,38 +4658,38 @@ ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicCmpSwap8))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) -; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64 -; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) -; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 -; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 3 -; MIPS64R6O0-NEXT: sll $1, $1, 3 +; MIPS64R6O0-NEXT: daddiu $3, $1, %lo(%neg(%gp_rel(AtomicCmpSwap8))) +; MIPS64R6O0-NEXT: move $1, $5 +; MIPS64R6O0-NEXT: move $2, $4 +; MIPS64R6O0-NEXT: ld $3, %got_disp(y)($3) +; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 +; MIPS64R6O0-NEXT: and $4, $3, $4 +; MIPS64R6O0-NEXT: andi $3, $3, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 3 +; MIPS64R6O0-NEXT: sll $9, $3, 3 ; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $6, $zero, $3 -; MIPS64R6O0-NEXT: andi $4, $4, 255 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 -; MIPS64R6O0-NEXT: andi $5, $5, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $1 +; MIPS64R6O0-NEXT: sllv $5, $3, $9 +; MIPS64R6O0-NEXT: nor $7, $zero, $5 +; MIPS64R6O0-NEXT: andi $2, $2, 255 +; MIPS64R6O0-NEXT: sllv $6, $2, $9 +; MIPS64R6O0-NEXT: andi $1, $1, 255 +; MIPS64R6O0-NEXT: sllv $8, $1, $9 ; MIPS64R6O0-NEXT: .LBB12_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($2) -; MIPS64R6O0-NEXT: and $9, $8, $3 -; MIPS64R6O0-NEXT: bnec $9, $4, .LBB12_3 +; MIPS64R6O0-NEXT: ll $2, 0($4) +; MIPS64R6O0-NEXT: and $3, $2, $5 +; MIPS64R6O0-NEXT: bnec $3, $6, .LBB12_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 -; MIPS64R6O0-NEXT: and $8, $8, $6 -; MIPS64R6O0-NEXT: or $8, $8, $5 -; MIPS64R6O0-NEXT: sc $8, 0($2) -; MIPS64R6O0-NEXT: beqzc $8, .LBB12_1 +; MIPS64R6O0-NEXT: and $2, $2, $7 +; MIPS64R6O0-NEXT: or $2, $2, $8 +; MIPS64R6O0-NEXT: sc $2, 0($4) +; MIPS64R6O0-NEXT: beqzc $2, .LBB12_1 ; MIPS64R6O0-NEXT: .LBB12_3: # %entry -; MIPS64R6O0-NEXT: srlv $7, $9, $1 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: srlv $1, $3, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.4: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: # %entry ; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 @@ -4935,44 +4933,47 @@ ; MIPS32O0-LABEL: AtomicCmpSwapRes8: ; MIPS32O0: # %bb.0: # %entry ; MIPS32O0-NEXT: addiu $sp, $sp, -8 -; MIPS32O0-NEXT: addiu $1, $zero, -4 -; MIPS32O0-NEXT: and $1, $4, $1 -; MIPS32O0-NEXT: andi $2, $4, 3 -; MIPS32O0-NEXT: sll $2, $2, 3 +; MIPS32O0-NEXT: move $1, $6 +; MIPS32O0-NEXT: move $2, $5 +; MIPS32O0-NEXT: move $3, $4 +; MIPS32O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: addiu $4, $zero, -4 +; MIPS32O0-NEXT: and $4, $3, $4 +; MIPS32O0-NEXT: andi $3, $3, 3 +; MIPS32O0-NEXT: sll $9, $3, 3 ; MIPS32O0-NEXT: ori $3, $zero, 255 -; MIPS32O0-NEXT: sllv $3, $3, $2 -; MIPS32O0-NEXT: nor $4, $zero, $3 -; MIPS32O0-NEXT: andi $7, $5, 255 -; MIPS32O0-NEXT: sllv $7, $7, $2 -; MIPS32O0-NEXT: andi $6, $6, 255 -; MIPS32O0-NEXT: sllv $6, $6, $2 +; MIPS32O0-NEXT: sllv $5, $3, $9 +; MIPS32O0-NEXT: nor $7, $zero, $5 +; MIPS32O0-NEXT: andi $2, $2, 255 +; MIPS32O0-NEXT: sllv $6, $2, $9 +; MIPS32O0-NEXT: andi $1, $1, 255 +; MIPS32O0-NEXT: sllv $8, $1, $9 ; MIPS32O0-NEXT: $BB13_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $9, 0($1) -; MIPS32O0-NEXT: and $10, $9, $3 -; MIPS32O0-NEXT: bne $10, $7, $BB13_3 +; MIPS32O0-NEXT: ll $2, 0($4) +; MIPS32O0-NEXT: and $3, $2, $5 +; MIPS32O0-NEXT: bne $3, $6, $BB13_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: # in Loop: Header=BB13_1 Depth=1 -; MIPS32O0-NEXT: and $9, $9, $4 -; MIPS32O0-NEXT: or $9, $9, $6 -; MIPS32O0-NEXT: sc $9, 0($1) -; MIPS32O0-NEXT: beqz $9, $BB13_1 +; MIPS32O0-NEXT: and $2, $2, $7 +; MIPS32O0-NEXT: or $2, $2, $8 +; MIPS32O0-NEXT: sc $2, 0($4) +; MIPS32O0-NEXT: beqz $2, $BB13_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB13_3: # %entry -; MIPS32O0-NEXT: srlv $8, $10, $2 -; MIPS32O0-NEXT: sll $8, $8, 24 -; MIPS32O0-NEXT: sra $8, $8, 24 +; MIPS32O0-NEXT: srlv $1, $3, $9 +; MIPS32O0-NEXT: sll $1, $1, 24 +; MIPS32O0-NEXT: sra $1, $1, 24 ; MIPS32O0-NEXT: # %bb.4: # %entry -; MIPS32O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill -; MIPS32O0-NEXT: sw $8, 0($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.5: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 24 +; MIPS32O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: sll $2, $2, 24 ; MIPS32O0-NEXT: sra $2, $2, 24 -; MIPS32O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: xor $2, $3, $2 -; MIPS32O0-NEXT: sltiu $2, $2, 1 +; MIPS32O0-NEXT: xor $1, $1, $2 +; MIPS32O0-NEXT: sltiu $2, $1, 1 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -5048,37 +5049,40 @@ ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: move $1, $6 ; MIPS32R6O0-NEXT: move $2, $5 -; MIPS32R6O0-NEXT: addiu $3, $zero, -4 -; MIPS32R6O0-NEXT: and $3, $4, $3 -; MIPS32R6O0-NEXT: andi $4, $4, 3 -; MIPS32R6O0-NEXT: sll $4, $4, 3 -; MIPS32R6O0-NEXT: ori $7, $zero, 255 -; MIPS32R6O0-NEXT: sllv $7, $7, $4 -; MIPS32R6O0-NEXT: nor $8, $zero, $7 -; MIPS32R6O0-NEXT: andi $9, $5, 255 -; MIPS32R6O0-NEXT: sllv $9, $9, $4 -; MIPS32R6O0-NEXT: andi $6, $6, 255 -; MIPS32R6O0-NEXT: sllv $6, $6, $4 +; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: move $3, $4 +; MIPS32R6O0-NEXT: # kill: def $a2 killed $at +; MIPS32R6O0-NEXT: # kill: def $a1 killed $v0 +; MIPS32R6O0-NEXT: addiu $4, $zero, -4 +; MIPS32R6O0-NEXT: and $4, $3, $4 +; MIPS32R6O0-NEXT: andi $3, $3, 3 +; MIPS32R6O0-NEXT: sll $9, $3, 3 +; MIPS32R6O0-NEXT: ori $3, $zero, 255 +; MIPS32R6O0-NEXT: sllv $5, $3, $9 +; MIPS32R6O0-NEXT: nor $7, $zero, $5 +; MIPS32R6O0-NEXT: andi $2, $2, 255 +; MIPS32R6O0-NEXT: sllv $6, $2, $9 +; MIPS32R6O0-NEXT: andi $1, $1, 255 +; MIPS32R6O0-NEXT: sllv $8, $1, $9 ; MIPS32R6O0-NEXT: $BB13_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $11, 0($3) -; MIPS32R6O0-NEXT: and $12, $11, $7 -; MIPS32R6O0-NEXT: bnec $12, $9, $BB13_3 +; MIPS32R6O0-NEXT: ll $2, 0($4) +; MIPS32R6O0-NEXT: and $3, $2, $5 +; MIPS32R6O0-NEXT: bnec $3, $6, $BB13_3 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 -; MIPS32R6O0-NEXT: and $11, $11, $8 -; MIPS32R6O0-NEXT: or $11, $11, $6 -; MIPS32R6O0-NEXT: sc $11, 0($3) -; MIPS32R6O0-NEXT: beqzc $11, $BB13_1 +; MIPS32R6O0-NEXT: and $2, $2, $7 +; MIPS32R6O0-NEXT: or $2, $2, $8 +; MIPS32R6O0-NEXT: sc $2, 0($4) +; MIPS32R6O0-NEXT: beqzc $2, $BB13_1 ; MIPS32R6O0-NEXT: $BB13_3: # %entry -; MIPS32R6O0-NEXT: srlv $10, $12, $4 -; MIPS32R6O0-NEXT: seb $10, $10 +; MIPS32R6O0-NEXT: srlv $1, $3, $9 +; MIPS32R6O0-NEXT: seb $1, $1 ; MIPS32R6O0-NEXT: # %bb.4: # %entry -; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $10, 0($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.5: # %entry -; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload -; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: xor $1, $1, $2 ; MIPS32R6O0-NEXT: sltiu $2, $1, 1 ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 @@ -5227,40 +5231,41 @@ ; MIPS64R6O0-LABEL: AtomicCmpSwapRes8: ; MIPS64R6O0: # %bb.0: # %entry ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 -; MIPS64R6O0-NEXT: # kill: def $a2 killed $a2 killed $a2_64 -; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64 -; MIPS64R6O0-NEXT: daddiu $1, $zero, -4 -; MIPS64R6O0-NEXT: and $1, $4, $1 -; MIPS64R6O0-NEXT: andi $2, $4, 3 -; MIPS64R6O0-NEXT: xori $2, $2, 3 -; MIPS64R6O0-NEXT: sll $2, $2, 3 +; MIPS64R6O0-NEXT: move $3, $4 +; MIPS64R6O0-NEXT: move $1, $6 +; MIPS64R6O0-NEXT: move $2, $5 +; MIPS64R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 +; MIPS64R6O0-NEXT: and $4, $3, $4 +; MIPS64R6O0-NEXT: andi $3, $3, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 3 +; MIPS64R6O0-NEXT: sll $9, $3, 3 ; MIPS64R6O0-NEXT: ori $3, $zero, 255 -; MIPS64R6O0-NEXT: sllv $3, $3, $2 -; MIPS64R6O0-NEXT: nor $4, $zero, $3 -; MIPS64R6O0-NEXT: andi $7, $5, 255 -; MIPS64R6O0-NEXT: sllv $7, $7, $2 -; MIPS64R6O0-NEXT: andi $6, $6, 255 -; MIPS64R6O0-NEXT: sllv $6, $6, $2 +; MIPS64R6O0-NEXT: sllv $5, $3, $9 +; MIPS64R6O0-NEXT: nor $7, $zero, $5 +; MIPS64R6O0-NEXT: andi $2, $2, 255 +; MIPS64R6O0-NEXT: sllv $6, $2, $9 +; MIPS64R6O0-NEXT: andi $1, $1, 255 +; MIPS64R6O0-NEXT: sllv $8, $1, $9 ; MIPS64R6O0-NEXT: .LBB13_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $9, 0($1) -; MIPS64R6O0-NEXT: and $10, $9, $3 -; MIPS64R6O0-NEXT: bnec $10, $7, .LBB13_3 +; MIPS64R6O0-NEXT: ll $2, 0($4) +; MIPS64R6O0-NEXT: and $3, $2, $5 +; MIPS64R6O0-NEXT: bnec $3, $6, .LBB13_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 -; MIPS64R6O0-NEXT: and $9, $9, $4 -; MIPS64R6O0-NEXT: or $9, $9, $6 -; MIPS64R6O0-NEXT: sc $9, 0($1) -; MIPS64R6O0-NEXT: beqzc $9, .LBB13_1 +; MIPS64R6O0-NEXT: and $2, $2, $7 +; MIPS64R6O0-NEXT: or $2, $2, $8 +; MIPS64R6O0-NEXT: sc $2, 0($4) +; MIPS64R6O0-NEXT: beqzc $2, .LBB13_1 ; MIPS64R6O0-NEXT: .LBB13_3: # %entry -; MIPS64R6O0-NEXT: srlv $8, $10, $2 -; MIPS64R6O0-NEXT: seb $8, $8 +; MIPS64R6O0-NEXT: srlv $1, $3, $9 +; MIPS64R6O0-NEXT: seb $1, $1 ; MIPS64R6O0-NEXT: # %bb.4: # %entry -; MIPS64R6O0-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: # %entry -; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload -; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: xor $1, $1, $2 ; MIPS64R6O0-NEXT: sltiu $2, $1, 1 ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 @@ -5502,34 +5507,34 @@ ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: lw $1, %got(z)($1) ; MIPS32O0-NEXT: addiu $2, $zero, -4 -; MIPS32O0-NEXT: and $2, $1, $2 +; MIPS32O0-NEXT: and $5, $1, $2 ; MIPS32O0-NEXT: andi $1, $1, 3 -; MIPS32O0-NEXT: sll $1, $1, 3 -; MIPS32O0-NEXT: ori $3, $zero, 65535 -; MIPS32O0-NEXT: sllv $3, $3, $1 -; MIPS32O0-NEXT: nor $5, $zero, $3 -; MIPS32O0-NEXT: sllv $4, $4, $1 +; MIPS32O0-NEXT: sll $9, $1, 3 +; MIPS32O0-NEXT: ori $1, $zero, 65535 +; MIPS32O0-NEXT: sllv $7, $1, $9 +; MIPS32O0-NEXT: nor $8, $zero, $7 +; MIPS32O0-NEXT: sllv $6, $4, $9 ; MIPS32O0-NEXT: $BB14_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $7, 0($2) -; MIPS32O0-NEXT: addu $8, $7, $4 -; MIPS32O0-NEXT: and $8, $8, $3 -; MIPS32O0-NEXT: and $9, $7, $5 -; MIPS32O0-NEXT: or $9, $9, $8 -; MIPS32O0-NEXT: sc $9, 0($2) -; MIPS32O0-NEXT: beqz $9, $BB14_1 +; MIPS32O0-NEXT: ll $2, 0($5) +; MIPS32O0-NEXT: addu $3, $2, $6 +; MIPS32O0-NEXT: and $3, $3, $7 +; MIPS32O0-NEXT: and $4, $2, $8 +; MIPS32O0-NEXT: or $4, $4, $3 +; MIPS32O0-NEXT: sc $4, 0($5) +; MIPS32O0-NEXT: beqz $4, $BB14_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry -; MIPS32O0-NEXT: and $6, $7, $3 -; MIPS32O0-NEXT: srlv $6, $6, $1 -; MIPS32O0-NEXT: sll $6, $6, 16 -; MIPS32O0-NEXT: sra $6, $6, 16 +; MIPS32O0-NEXT: and $1, $2, $7 +; MIPS32O0-NEXT: srlv $1, $1, $9 +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $1, $1, 16 ; MIPS32O0-NEXT: # %bb.3: # %entry -; MIPS32O0-NEXT: sw $6, 4($sp) # 4-byte Folded Spill +; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.4: # %entry ; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 16 -; MIPS32O0-NEXT: sra $2, $2, 16 +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $2, $1, 16 ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -5603,31 +5608,31 @@ ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: addu $1, $2, $25 -; MIPS32R6O0-NEXT: move $2, $4 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a0 ; MIPS32R6O0-NEXT: lw $1, %got(z)($1) -; MIPS32R6O0-NEXT: addiu $3, $zero, -4 -; MIPS32R6O0-NEXT: and $3, $1, $3 +; MIPS32R6O0-NEXT: addiu $2, $zero, -4 +; MIPS32R6O0-NEXT: and $5, $1, $2 ; MIPS32R6O0-NEXT: andi $1, $1, 3 -; MIPS32R6O0-NEXT: sll $1, $1, 3 -; MIPS32R6O0-NEXT: ori $5, $zero, 65535 -; MIPS32R6O0-NEXT: sllv $5, $5, $1 -; MIPS32R6O0-NEXT: nor $6, $zero, $5 -; MIPS32R6O0-NEXT: sllv $4, $4, $1 +; MIPS32R6O0-NEXT: sll $9, $1, 3 +; MIPS32R6O0-NEXT: ori $1, $zero, 65535 +; MIPS32R6O0-NEXT: sllv $7, $1, $9 +; MIPS32R6O0-NEXT: nor $8, $zero, $7 +; MIPS32R6O0-NEXT: sllv $6, $4, $9 ; MIPS32R6O0-NEXT: $BB14_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $8, 0($3) -; MIPS32R6O0-NEXT: addu $9, $8, $4 -; MIPS32R6O0-NEXT: and $9, $9, $5 -; MIPS32R6O0-NEXT: and $10, $8, $6 -; MIPS32R6O0-NEXT: or $10, $10, $9 -; MIPS32R6O0-NEXT: sc $10, 0($3) -; MIPS32R6O0-NEXT: beqzc $10, $BB14_1 +; MIPS32R6O0-NEXT: ll $2, 0($5) +; MIPS32R6O0-NEXT: addu $3, $2, $6 +; MIPS32R6O0-NEXT: and $3, $3, $7 +; MIPS32R6O0-NEXT: and $4, $2, $8 +; MIPS32R6O0-NEXT: or $4, $4, $3 +; MIPS32R6O0-NEXT: sc $4, 0($5) +; MIPS32R6O0-NEXT: beqzc $4, $BB14_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry -; MIPS32R6O0-NEXT: and $7, $8, $5 -; MIPS32R6O0-NEXT: srlv $7, $7, $1 -; MIPS32R6O0-NEXT: seh $7, $7 +; MIPS32R6O0-NEXT: and $1, $2, $7 +; MIPS32R6O0-NEXT: srlv $1, $1, $9 +; MIPS32R6O0-NEXT: seh $1, $1 ; MIPS32R6O0-NEXT: # %bb.3: # %entry -; MIPS32R6O0-NEXT: sw $7, 4($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.4: # %entry ; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: seh $2, $1 @@ -5770,33 +5775,33 @@ ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd16))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 -; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) -; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; MIPS64R6O0-NEXT: ld $1, %got_disp(z)($1) -; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 -; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $1, $1, 3 -; MIPS64R6O0-NEXT: xori $1, $1, 2 -; MIPS64R6O0-NEXT: sll $1, $1, 3 -; MIPS64R6O0-NEXT: ori $3, $zero, 65535 -; MIPS64R6O0-NEXT: sllv $3, $3, $1 -; MIPS64R6O0-NEXT: nor $5, $zero, $3 -; MIPS64R6O0-NEXT: sllv $4, $4, $1 +; MIPS64R6O0-NEXT: daddiu $2, $1, %lo(%neg(%gp_rel(AtomicLoadAdd16))) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: ld $2, %got_disp(z)($2) +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $5, $2, $3 +; MIPS64R6O0-NEXT: andi $2, $2, 3 +; MIPS64R6O0-NEXT: xori $2, $2, 2 +; MIPS64R6O0-NEXT: sll $9, $2, 3 +; MIPS64R6O0-NEXT: ori $2, $zero, 65535 +; MIPS64R6O0-NEXT: sllv $7, $2, $9 +; MIPS64R6O0-NEXT: nor $8, $zero, $7 +; MIPS64R6O0-NEXT: sllv $6, $1, $9 ; MIPS64R6O0-NEXT: .LBB14_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $7, 0($2) -; MIPS64R6O0-NEXT: addu $8, $7, $4 -; MIPS64R6O0-NEXT: and $8, $8, $3 -; MIPS64R6O0-NEXT: and $9, $7, $5 -; MIPS64R6O0-NEXT: or $9, $9, $8 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB14_1 +; MIPS64R6O0-NEXT: ll $2, 0($5) +; MIPS64R6O0-NEXT: addu $3, $2, $6 +; MIPS64R6O0-NEXT: and $3, $3, $7 +; MIPS64R6O0-NEXT: and $4, $2, $8 +; MIPS64R6O0-NEXT: or $4, $4, $3 +; MIPS64R6O0-NEXT: sc $4, 0($5) +; MIPS64R6O0-NEXT: beqzc $4, .LBB14_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $6, $7, $3 -; MIPS64R6O0-NEXT: srlv $6, $6, $1 -; MIPS64R6O0-NEXT: seh $6, $6 +; MIPS64R6O0-NEXT: and $1, $2, $7 +; MIPS64R6O0-NEXT: srlv $1, $1, $9 +; MIPS64R6O0-NEXT: seh $1, $1 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seh $2, $1 @@ -6025,46 +6030,47 @@ ; MIPS32O0: # %bb.0: ; MIPS32O0-NEXT: addiu $sp, $sp, -8 ; MIPS32O0-NEXT: .cfi_def_cfa_offset 8 -; MIPS32O0-NEXT: addu $1, $5, $6 +; MIPS32O0-NEXT: move $1, $7 +; MIPS32O0-NEXT: move $3, $4 +; MIPS32O0-NEXT: addu $2, $5, $6 +; MIPS32O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: sync -; MIPS32O0-NEXT: addiu $2, $zero, -4 -; MIPS32O0-NEXT: and $2, $4, $2 -; MIPS32O0-NEXT: andi $3, $4, 3 -; MIPS32O0-NEXT: sll $3, $3, 3 -; MIPS32O0-NEXT: ori $4, $zero, 65535 -; MIPS32O0-NEXT: sllv $4, $4, $3 -; MIPS32O0-NEXT: nor $5, $zero, $4 -; MIPS32O0-NEXT: andi $6, $1, 65535 -; MIPS32O0-NEXT: sllv $6, $6, $3 -; MIPS32O0-NEXT: andi $7, $7, 65535 -; MIPS32O0-NEXT: sllv $7, $7, $3 +; MIPS32O0-NEXT: addiu $4, $zero, -4 +; MIPS32O0-NEXT: and $4, $3, $4 +; MIPS32O0-NEXT: andi $3, $3, 3 +; MIPS32O0-NEXT: sll $9, $3, 3 +; MIPS32O0-NEXT: ori $3, $zero, 65535 +; MIPS32O0-NEXT: sllv $5, $3, $9 +; MIPS32O0-NEXT: nor $7, $zero, $5 +; MIPS32O0-NEXT: andi $2, $2, 65535 +; MIPS32O0-NEXT: sllv $6, $2, $9 +; MIPS32O0-NEXT: andi $1, $1, 65535 +; MIPS32O0-NEXT: sllv $8, $1, $9 ; MIPS32O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $9, 0($2) -; MIPS32O0-NEXT: and $10, $9, $4 -; MIPS32O0-NEXT: bne $10, $6, $BB15_3 +; MIPS32O0-NEXT: ll $2, 0($4) +; MIPS32O0-NEXT: and $3, $2, $5 +; MIPS32O0-NEXT: bne $3, $6, $BB15_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; MIPS32O0-NEXT: and $9, $9, $5 -; MIPS32O0-NEXT: or $9, $9, $7 -; MIPS32O0-NEXT: sc $9, 0($2) -; MIPS32O0-NEXT: beqz $9, $BB15_1 +; MIPS32O0-NEXT: and $2, $2, $7 +; MIPS32O0-NEXT: or $2, $2, $8 +; MIPS32O0-NEXT: sc $2, 0($4) +; MIPS32O0-NEXT: beqz $2, $BB15_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB15_3: -; MIPS32O0-NEXT: srlv $8, $10, $3 -; MIPS32O0-NEXT: sll $8, $8, 16 -; MIPS32O0-NEXT: sra $8, $8, 16 +; MIPS32O0-NEXT: srlv $1, $3, $9 +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $1, $1, 16 ; MIPS32O0-NEXT: # %bb.4: ; MIPS32O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill -; MIPS32O0-NEXT: sw $8, 0($sp) # 4-byte Folded Spill ; MIPS32O0-NEXT: # %bb.5: -; MIPS32O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: sll $2, $1, 16 -; MIPS32O0-NEXT: sra $2, $2, 16 -; MIPS32O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload -; MIPS32O0-NEXT: xor $2, $3, $2 -; MIPS32O0-NEXT: sltiu $3, $2, 1 +; MIPS32O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload +; MIPS32O0-NEXT: sll $1, $1, 16 +; MIPS32O0-NEXT: sra $1, $1, 16 +; MIPS32O0-NEXT: xor $1, $2, $1 +; MIPS32O0-NEXT: sltiu $3, $1, 1 ; MIPS32O0-NEXT: sync -; MIPS32O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ; MIPS32O0-NEXT: addiu $sp, $sp, 8 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop @@ -6145,44 +6151,45 @@ ; MIPS32R6O0-NEXT: addiu $sp, $sp, -8 ; MIPS32R6O0-NEXT: .cfi_def_cfa_offset 8 ; MIPS32R6O0-NEXT: move $1, $7 -; MIPS32R6O0-NEXT: move $2, $6 -; MIPS32R6O0-NEXT: move $3, $5 -; MIPS32R6O0-NEXT: addu $5, $5, $6 +; MIPS32R6O0-NEXT: move $3, $4 +; MIPS32R6O0-NEXT: # kill: def $a3 killed $at +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a2 +; MIPS32R6O0-NEXT: # kill: def $v0 killed $a1 +; MIPS32R6O0-NEXT: addu $2, $5, $6 +; MIPS32R6O0-NEXT: sw $2, 0($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: addiu $6, $zero, -4 -; MIPS32R6O0-NEXT: and $6, $4, $6 -; MIPS32R6O0-NEXT: andi $4, $4, 3 -; MIPS32R6O0-NEXT: sll $4, $4, 3 -; MIPS32R6O0-NEXT: ori $8, $zero, 65535 -; MIPS32R6O0-NEXT: sllv $8, $8, $4 -; MIPS32R6O0-NEXT: nor $9, $zero, $8 -; MIPS32R6O0-NEXT: andi $10, $5, 65535 -; MIPS32R6O0-NEXT: sllv $10, $10, $4 -; MIPS32R6O0-NEXT: andi $7, $7, 65535 -; MIPS32R6O0-NEXT: sllv $7, $7, $4 +; MIPS32R6O0-NEXT: addiu $4, $zero, -4 +; MIPS32R6O0-NEXT: and $4, $3, $4 +; MIPS32R6O0-NEXT: andi $3, $3, 3 +; MIPS32R6O0-NEXT: sll $9, $3, 3 +; MIPS32R6O0-NEXT: ori $3, $zero, 65535 +; MIPS32R6O0-NEXT: sllv $5, $3, $9 +; MIPS32R6O0-NEXT: nor $7, $zero, $5 +; MIPS32R6O0-NEXT: andi $2, $2, 65535 +; MIPS32R6O0-NEXT: sllv $6, $2, $9 +; MIPS32R6O0-NEXT: andi $1, $1, 65535 +; MIPS32R6O0-NEXT: sllv $8, $1, $9 ; MIPS32R6O0-NEXT: $BB15_1: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $12, 0($6) -; MIPS32R6O0-NEXT: and $13, $12, $8 -; MIPS32R6O0-NEXT: bnec $13, $10, $BB15_3 +; MIPS32R6O0-NEXT: ll $2, 0($4) +; MIPS32R6O0-NEXT: and $3, $2, $5 +; MIPS32R6O0-NEXT: bnec $3, $6, $BB15_3 ; MIPS32R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; MIPS32R6O0-NEXT: and $12, $12, $9 -; MIPS32R6O0-NEXT: or $12, $12, $7 -; MIPS32R6O0-NEXT: sc $12, 0($6) -; MIPS32R6O0-NEXT: beqzc $12, $BB15_1 +; MIPS32R6O0-NEXT: and $2, $2, $7 +; MIPS32R6O0-NEXT: or $2, $2, $8 +; MIPS32R6O0-NEXT: sc $2, 0($4) +; MIPS32R6O0-NEXT: beqzc $2, $BB15_1 ; MIPS32R6O0-NEXT: $BB15_3: -; MIPS32R6O0-NEXT: srlv $11, $13, $4 -; MIPS32R6O0-NEXT: seh $11, $11 +; MIPS32R6O0-NEXT: srlv $1, $3, $9 +; MIPS32R6O0-NEXT: seh $1, $1 ; MIPS32R6O0-NEXT: # %bb.4: -; MIPS32R6O0-NEXT: sw $5, 4($sp) # 4-byte Folded Spill -; MIPS32R6O0-NEXT: sw $11, 0($sp) # 4-byte Folded Spill +; MIPS32R6O0-NEXT: sw $1, 4($sp) # 4-byte Folded Spill ; MIPS32R6O0-NEXT: # %bb.5: -; MIPS32R6O0-NEXT: lw $1, 4($sp) # 4-byte Folded Reload -; MIPS32R6O0-NEXT: seh $2, $1 -; MIPS32R6O0-NEXT: lw $3, 0($sp) # 4-byte Folded Reload -; MIPS32R6O0-NEXT: xor $2, $3, $2 -; MIPS32R6O0-NEXT: sltiu $3, $2, 1 +; MIPS32R6O0-NEXT: lw $2, 4($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: lw $1, 0($sp) # 4-byte Folded Reload +; MIPS32R6O0-NEXT: seh $1, $1 +; MIPS32R6O0-NEXT: xor $1, $2, $1 +; MIPS32R6O0-NEXT: sltiu $3, $1, 1 ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: lw $2, 0($sp) # 4-byte Folded Reload ; MIPS32R6O0-NEXT: addiu $sp, $sp, 8 ; MIPS32R6O0-NEXT: jrc $ra ; @@ -6351,49 +6358,49 @@ ; MIPS64R6O0: # %bb.0: ; MIPS64R6O0-NEXT: daddiu $sp, $sp, -16 ; MIPS64R6O0-NEXT: .cfi_def_cfa_offset 16 -; MIPS64R6O0-NEXT: # kill: def $a3 killed $a3 killed $a3_64 -; MIPS64R6O0-NEXT: sll $1, $7, 0 -; MIPS64R6O0-NEXT: # kill: def $a2 killed $a2 killed $a2_64 -; MIPS64R6O0-NEXT: sll $2, $6, 0 -; MIPS64R6O0-NEXT: # kill: def $a1 killed $a1 killed $a1_64 -; MIPS64R6O0-NEXT: sll $3, $5, 0 -; MIPS64R6O0-NEXT: addu $2, $3, $2 +; MIPS64R6O0-NEXT: move $3, $4 +; MIPS64R6O0-NEXT: move $1, $7 +; MIPS64R6O0-NEXT: sll $1, $1, 0 +; MIPS64R6O0-NEXT: move $2, $6 +; MIPS64R6O0-NEXT: sll $4, $2, 0 +; MIPS64R6O0-NEXT: move $2, $5 +; MIPS64R6O0-NEXT: sll $2, $2, 0 +; MIPS64R6O0-NEXT: addu $2, $2, $4 +; MIPS64R6O0-NEXT: sw $2, 8($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 -; MIPS64R6O0-NEXT: and $3, $4, $3 -; MIPS64R6O0-NEXT: andi $4, $4, 3 -; MIPS64R6O0-NEXT: xori $4, $4, 2 -; MIPS64R6O0-NEXT: sll $4, $4, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 65535 -; MIPS64R6O0-NEXT: sllv $5, $5, $4 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: andi $7, $2, 65535 -; MIPS64R6O0-NEXT: sllv $7, $7, $4 +; MIPS64R6O0-NEXT: daddiu $4, $zero, -4 +; MIPS64R6O0-NEXT: and $4, $3, $4 +; MIPS64R6O0-NEXT: andi $3, $3, 3 +; MIPS64R6O0-NEXT: xori $3, $3, 2 +; MIPS64R6O0-NEXT: sll $9, $3, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 65535 +; MIPS64R6O0-NEXT: sllv $5, $3, $9 +; MIPS64R6O0-NEXT: nor $7, $zero, $5 +; MIPS64R6O0-NEXT: andi $2, $2, 65535 +; MIPS64R6O0-NEXT: sllv $6, $2, $9 ; MIPS64R6O0-NEXT: andi $1, $1, 65535 -; MIPS64R6O0-NEXT: sllv $1, $1, $4 +; MIPS64R6O0-NEXT: sllv $8, $1, $9 ; MIPS64R6O0-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $9, 0($3) -; MIPS64R6O0-NEXT: and $10, $9, $5 -; MIPS64R6O0-NEXT: bnec $10, $7, .LBB15_3 +; MIPS64R6O0-NEXT: ll $2, 0($4) +; MIPS64R6O0-NEXT: and $3, $2, $5 +; MIPS64R6O0-NEXT: bnec $3, $6, .LBB15_3 ; MIPS64R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; MIPS64R6O0-NEXT: and $9, $9, $6 -; MIPS64R6O0-NEXT: or $9, $9, $1 -; MIPS64R6O0-NEXT: sc $9, 0($3) -; MIPS64R6O0-NEXT: beqzc $9, .LBB15_1 +; MIPS64R6O0-NEXT: and $2, $2, $7 +; MIPS64R6O0-NEXT: or $2, $2, $8 +; MIPS64R6O0-NEXT: sc $2, 0($4) +; MIPS64R6O0-NEXT: beqzc $2, .LBB15_1 ; MIPS64R6O0-NEXT: .LBB15_3: -; MIPS64R6O0-NEXT: srlv $8, $10, $4 -; MIPS64R6O0-NEXT: seh $8, $8 +; MIPS64R6O0-NEXT: srlv $1, $3, $9 +; MIPS64R6O0-NEXT: seh $1, $1 ; MIPS64R6O0-NEXT: # %bb.4: -; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: -; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload -; MIPS64R6O0-NEXT: seh $2, $1 -; MIPS64R6O0-NEXT: lw $3, 8($sp) # 4-byte Folded Reload -; MIPS64R6O0-NEXT: xor $2, $3, $2 -; MIPS64R6O0-NEXT: sltiu $3, $2, 1 +; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload +; MIPS64R6O0-NEXT: seh $1, $1 +; MIPS64R6O0-NEXT: xor $1, $2, $1 +; MIPS64R6O0-NEXT: sltiu $3, $1, 1 ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; @@ -6620,13 +6627,13 @@ ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: sync -; MIPS32O0-NEXT: lw $1, %got(countsint)($1) +; MIPS32O0-NEXT: lw $3, %got(countsint)($1) ; MIPS32O0-NEXT: $BB16_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: addu $3, $2, $4 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB16_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: addu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB16_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: sync @@ -6675,13 +6682,13 @@ ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: lw $1, %got(countsint)($1) +; MIPS32R6O0-NEXT: lw $3, %got(countsint)($1) ; MIPS32R6O0-NEXT: $BB16_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: addu $3, $2, $4 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB16_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: addu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB16_1 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: sync ; MIPS32R6O0-NEXT: jrc $ra @@ -6767,13 +6774,13 @@ ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(CheckSync))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: ld $1, %got_disp(countsint)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(countsint)($1) ; MIPS64R6O0-NEXT: .LBB16_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: addu $3, $2, $4 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB16_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: addu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB16_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: sync ; MIPS64R6O0-NEXT: jrc $ra @@ -6918,29 +6925,29 @@ ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: sync -; MIPS32O0-NEXT: lw $1, %got(a)($1) -; MIPS32O0-NEXT: addiu $2, $zero, 0 -; MIPS32O0-NEXT: addiu $3, $zero, 1 -; MIPS32O0-NEXT: move $4, $3 +; MIPS32O0-NEXT: lw $4, %got(a)($1) +; MIPS32O0-NEXT: addiu $6, $zero, 0 +; MIPS32O0-NEXT: addiu $2, $zero, 1 +; MIPS32O0-NEXT: move $5, $2 ; MIPS32O0-NEXT: $BB17_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $5, 0($1) -; MIPS32O0-NEXT: bne $5, $4, $BB17_3 +; MIPS32O0-NEXT: ll $1, 0($4) +; MIPS32O0-NEXT: bne $1, $5, $BB17_3 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: # in Loop: Header=BB17_1 Depth=1 -; MIPS32O0-NEXT: move $6, $2 -; MIPS32O0-NEXT: sc $6, 0($1) -; MIPS32O0-NEXT: beqz $6, $BB17_1 +; MIPS32O0-NEXT: move $3, $6 +; MIPS32O0-NEXT: sc $3, 0($4) +; MIPS32O0-NEXT: beqz $3, $BB17_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: $BB17_3: # %entry -; MIPS32O0-NEXT: xor $1, $5, $3 -; MIPS32O0-NEXT: sltiu $1, $1, 1 +; MIPS32O0-NEXT: xor $2, $1, $2 +; MIPS32O0-NEXT: sltiu $2, $2, 1 ; MIPS32O0-NEXT: sync ; MIPS32O0-NEXT: addiu $2, $zero, 1 -; MIPS32O0-NEXT: xor $2, $5, $2 -; MIPS32O0-NEXT: sltiu $2, $2, 1 -; MIPS32O0-NEXT: andi $2, $2, 1 +; MIPS32O0-NEXT: xor $1, $1, $2 +; MIPS32O0-NEXT: sltiu $1, $1, 1 +; MIPS32O0-NEXT: andi $2, $1, 1 ; MIPS32O0-NEXT: jr $ra ; MIPS32O0-NEXT: nop ; @@ -7001,21 +7008,21 @@ ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 ; MIPS32R6O0-NEXT: sync -; MIPS32R6O0-NEXT: lw $1, %got(a)($1) -; MIPS32R6O0-NEXT: addiu $2, $zero, 0 -; MIPS32R6O0-NEXT: addiu $3, $zero, 1 -; MIPS32R6O0-NEXT: move $4, $3 +; MIPS32R6O0-NEXT: lw $4, %got(a)($1) +; MIPS32R6O0-NEXT: addiu $6, $zero, 0 +; MIPS32R6O0-NEXT: addiu $2, $zero, 1 +; MIPS32R6O0-NEXT: move $5, $2 ; MIPS32R6O0-NEXT: $BB17_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $5, 0($1) -; MIPS32R6O0-NEXT: bnec $5, $4, $BB17_3 +; MIPS32R6O0-NEXT: ll $1, 0($4) +; MIPS32R6O0-NEXT: bnec $1, $5, $BB17_3 ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1 -; MIPS32R6O0-NEXT: move $6, $2 -; MIPS32R6O0-NEXT: sc $6, 0($1) -; MIPS32R6O0-NEXT: beqzc $6, $BB17_1 +; MIPS32R6O0-NEXT: move $3, $6 +; MIPS32R6O0-NEXT: sc $3, 0($4) +; MIPS32R6O0-NEXT: beqzc $3, $BB17_1 ; MIPS32R6O0-NEXT: $BB17_3: # %entry -; MIPS32R6O0-NEXT: xor $1, $5, $3 +; MIPS32R6O0-NEXT: xor $1, $1, $2 ; MIPS32R6O0-NEXT: sltiu $2, $1, 1 ; MIPS32R6O0-NEXT: sync ; MIPS32R6O0-NEXT: jrc $ra @@ -7131,21 +7138,21 @@ ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(zeroreg))) ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: ld $1, %got_disp(a)($1) -; MIPS64R6O0-NEXT: addiu $2, $zero, 0 -; MIPS64R6O0-NEXT: addiu $3, $zero, 1 -; MIPS64R6O0-NEXT: move $4, $3 +; MIPS64R6O0-NEXT: ld $4, %got_disp(a)($1) +; MIPS64R6O0-NEXT: addiu $6, $zero, 0 +; MIPS64R6O0-NEXT: addiu $2, $zero, 1 +; MIPS64R6O0-NEXT: move $5, $2 ; MIPS64R6O0-NEXT: .LBB17_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $5, 0($1) -; MIPS64R6O0-NEXT: bnec $5, $4, .LBB17_3 +; MIPS64R6O0-NEXT: ll $1, 0($4) +; MIPS64R6O0-NEXT: bnec $1, $5, .LBB17_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB17_1 Depth=1 -; MIPS64R6O0-NEXT: move $6, $2 -; MIPS64R6O0-NEXT: sc $6, 0($1) -; MIPS64R6O0-NEXT: beqzc $6, .LBB17_1 +; MIPS64R6O0-NEXT: move $3, $6 +; MIPS64R6O0-NEXT: sc $3, 0($4) +; MIPS64R6O0-NEXT: beqzc $3, .LBB17_1 ; MIPS64R6O0-NEXT: .LBB17_3: # %entry -; MIPS64R6O0-NEXT: xor $1, $5, $3 +; MIPS64R6O0-NEXT: xor $1, $1, $2 ; MIPS64R6O0-NEXT: sltiu $2, $1, 1 ; MIPS64R6O0-NEXT: sync ; MIPS64R6O0-NEXT: jrc $ra @@ -7316,13 +7323,13 @@ ; MIPS32O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32O0-NEXT: addu $1, $2, $25 ; MIPS32O0-NEXT: lw $1, %got(x)($1) -; MIPS32O0-NEXT: addiu $1, $1, 1024 +; MIPS32O0-NEXT: addiu $3, $1, 1024 ; MIPS32O0-NEXT: $BB18_1: # %entry ; MIPS32O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32O0-NEXT: ll $2, 0($1) -; MIPS32O0-NEXT: addu $3, $2, $4 -; MIPS32O0-NEXT: sc $3, 0($1) -; MIPS32O0-NEXT: beqz $3, $BB18_1 +; MIPS32O0-NEXT: ll $2, 0($3) +; MIPS32O0-NEXT: addu $1, $2, $4 +; MIPS32O0-NEXT: sc $1, 0($3) +; MIPS32O0-NEXT: beqz $1, $BB18_1 ; MIPS32O0-NEXT: nop ; MIPS32O0-NEXT: # %bb.2: # %entry ; MIPS32O0-NEXT: jr $ra @@ -7369,13 +7376,13 @@ ; MIPS32R6O0-NEXT: addiu $2, $2, %lo(_gp_disp) ; MIPS32R6O0-NEXT: addu $1, $2, $25 ; MIPS32R6O0-NEXT: lw $1, %got(x)($1) -; MIPS32R6O0-NEXT: addiu $1, $1, 1024 +; MIPS32R6O0-NEXT: addiu $3, $1, 1024 ; MIPS32R6O0-NEXT: $BB18_1: # %entry ; MIPS32R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS32R6O0-NEXT: ll $2, 0($1) -; MIPS32R6O0-NEXT: addu $3, $2, $4 -; MIPS32R6O0-NEXT: sc $3, 0($1) -; MIPS32R6O0-NEXT: beqzc $3, $BB18_1 +; MIPS32R6O0-NEXT: ll $2, 0($3) +; MIPS32R6O0-NEXT: addu $1, $2, $4 +; MIPS32R6O0-NEXT: sc $1, 0($3) +; MIPS32R6O0-NEXT: beqzc $1, $BB18_1 ; MIPS32R6O0-NEXT: nop ; MIPS32R6O0-NEXT: # %bb.2: # %entry ; MIPS32R6O0-NEXT: jrc $ra @@ -7458,13 +7465,13 @@ ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd32_OffGt9Bit))) ; MIPS64R6O0-NEXT: # kill: def $a0 killed $a0 killed $a0_64 ; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) -; MIPS64R6O0-NEXT: daddiu $1, $1, 1024 +; MIPS64R6O0-NEXT: daddiu $3, $1, 1024 ; MIPS64R6O0-NEXT: .LBB18_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $2, 0($1) -; MIPS64R6O0-NEXT: addu $3, $2, $4 -; MIPS64R6O0-NEXT: sc $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB18_1 +; MIPS64R6O0-NEXT: ll $2, 0($3) +; MIPS64R6O0-NEXT: addu $1, $2, $4 +; MIPS64R6O0-NEXT: sc $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB18_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra Index: llvm/test/CodeGen/Mips/atomic64.ll =================================================================== --- llvm/test/CodeGen/Mips/atomic64.ll +++ llvm/test/CodeGen/Mips/atomic64.ll @@ -95,13 +95,13 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAdd))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAdd))) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB0_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: daddu $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB0_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: daddu $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB0_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -252,13 +252,13 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadSub))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadSub))) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB1_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: dsubu $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB1_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: dsubu $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB1_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -409,13 +409,13 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadAnd))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadAnd))) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB2_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: and $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB2_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB2_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -566,13 +566,13 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadOr))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadOr))) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB3_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: or $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB3_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: or $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB3_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -723,13 +723,13 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadXor))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadXor))) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB4_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: xor $3, $2, $4 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB4_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: xor $1, $2, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB4_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -884,14 +884,14 @@ ; MIPS64R6O0-NEXT: lui $1, %hi(%neg(%gp_rel(AtomicLoadNand))) ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicLoadNand))) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB5_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $2, 0($1) -; MIPS64R6O0-NEXT: and $3, $2, $4 -; MIPS64R6O0-NEXT: nor $3, $zero, $3 -; MIPS64R6O0-NEXT: scd $3, 0($1) -; MIPS64R6O0-NEXT: beqzc $3, .LBB5_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: and $1, $2, $4 +; MIPS64R6O0-NEXT: nor $1, $zero, $1 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB5_1 ; MIPS64R6O0-NEXT: nop ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: jrc $ra @@ -1057,16 +1057,15 @@ ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicSwap64))) ; MIPS64R6O0-NEXT: sd $4, 8($sp) -; MIPS64R6O0-NEXT: ld $2, 8($sp) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $4, 8($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB6_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $3, 0($1) -; MIPS64R6O0-NEXT: move $4, $2 -; MIPS64R6O0-NEXT: scd $4, 0($1) -; MIPS64R6O0-NEXT: beqzc $4, .LBB6_1 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: move $1, $4 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB6_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; @@ -1253,19 +1252,18 @@ ; MIPS64R6O0-NEXT: daddu $1, $1, $25 ; MIPS64R6O0-NEXT: daddiu $1, $1, %lo(%neg(%gp_rel(AtomicCmpSwap64))) ; MIPS64R6O0-NEXT: sd $5, 8($sp) -; MIPS64R6O0-NEXT: ld $2, 8($sp) -; MIPS64R6O0-NEXT: ld $1, %got_disp(x)($1) +; MIPS64R6O0-NEXT: ld $5, 8($sp) +; MIPS64R6O0-NEXT: ld $3, %got_disp(x)($1) ; MIPS64R6O0-NEXT: .LBB7_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: lld $3, 0($1) -; MIPS64R6O0-NEXT: bnec $3, $4, .LBB7_3 +; MIPS64R6O0-NEXT: lld $2, 0($3) +; MIPS64R6O0-NEXT: bnec $2, $4, .LBB7_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB7_1 Depth=1 -; MIPS64R6O0-NEXT: move $5, $2 -; MIPS64R6O0-NEXT: scd $5, 0($1) -; MIPS64R6O0-NEXT: beqzc $5, .LBB7_1 +; MIPS64R6O0-NEXT: move $1, $5 +; MIPS64R6O0-NEXT: scd $1, 0($3) +; MIPS64R6O0-NEXT: beqzc $1, .LBB7_1 ; MIPS64R6O0-NEXT: .LBB7_3: # %entry -; MIPS64R6O0-NEXT: move $2, $3 ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 ; MIPS64R6O0-NEXT: jrc $ra ; Index: llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll =================================================================== --- llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll +++ llvm/test/CodeGen/Mips/atomicCmpSwapPW.ll @@ -12,18 +12,18 @@ ; O32-LABEL: foo: ; O32: # %bb.0: # %entry ; O32-NEXT: lui $1, %hi(sym) -; O32-NEXT: lw $1, %lo(sym)($1) +; O32-NEXT: lw $3, %lo(sym)($1) ; O32-NEXT: sync ; O32-NEXT: $BB0_1: # %entry ; O32-NEXT: # =>This Inner Loop Header: Depth=1 -; O32-NEXT: ll $2, 0($1) -; O32-NEXT: bne $2, $4, $BB0_3 +; O32-NEXT: ll $1, 0($3) +; O32-NEXT: bne $1, $4, $BB0_3 ; O32-NEXT: nop ; O32-NEXT: # %bb.2: # %entry ; O32-NEXT: # in Loop: Header=BB0_1 Depth=1 -; O32-NEXT: move $3, $5 -; O32-NEXT: sc $3, 0($1) -; O32-NEXT: beqz $3, $BB0_1 +; O32-NEXT: move $2, $5 +; O32-NEXT: sc $2, 0($3) +; O32-NEXT: beqz $2, $BB0_1 ; O32-NEXT: nop ; O32-NEXT: $BB0_3: # %entry ; O32-NEXT: sync @@ -32,23 +32,23 @@ ; ; N32-LABEL: foo: ; N32: # %bb.0: # %entry -; N32-NEXT: # kill: def $a1 killed $a1 killed $a1_64 -; N32-NEXT: sll $1, $5, 0 -; N32-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; N32-NEXT: sll $2, $4, 0 -; N32-NEXT: lui $3, %hi(sym) -; N32-NEXT: lw $3, %lo(sym)($3) +; N32-NEXT: move $1, $5 +; N32-NEXT: sll $5, $1, 0 +; N32-NEXT: move $1, $4 +; N32-NEXT: sll $4, $1, 0 +; N32-NEXT: lui $1, %hi(sym) +; N32-NEXT: lw $3, %lo(sym)($1) ; N32-NEXT: sync ; N32-NEXT: .LBB0_1: # %entry ; N32-NEXT: # =>This Inner Loop Header: Depth=1 -; N32-NEXT: ll $4, 0($3) -; N32-NEXT: bne $4, $2, .LBB0_3 +; N32-NEXT: ll $1, 0($3) +; N32-NEXT: bne $1, $4, .LBB0_3 ; N32-NEXT: nop ; N32-NEXT: # %bb.2: # %entry ; N32-NEXT: # in Loop: Header=BB0_1 Depth=1 -; N32-NEXT: move $5, $1 -; N32-NEXT: sc $5, 0($3) -; N32-NEXT: beqz $5, .LBB0_1 +; N32-NEXT: move $2, $5 +; N32-NEXT: sc $2, 0($3) +; N32-NEXT: beqz $2, .LBB0_1 ; N32-NEXT: nop ; N32-NEXT: .LBB0_3: # %entry ; N32-NEXT: sync @@ -57,27 +57,27 @@ ; ; N64-LABEL: foo: ; N64: # %bb.0: # %entry -; N64-NEXT: # kill: def $a1 killed $a1 killed $a1_64 -; N64-NEXT: sll $1, $5, 0 -; N64-NEXT: # kill: def $a0 killed $a0 killed $a0_64 -; N64-NEXT: sll $2, $4, 0 -; N64-NEXT: lui $3, %highest(sym) -; N64-NEXT: daddiu $3, $3, %higher(sym) -; N64-NEXT: dsll $3, $3, 16 -; N64-NEXT: daddiu $3, $3, %hi(sym) -; N64-NEXT: dsll $3, $3, 16 -; N64-NEXT: ld $3, %lo(sym)($3) +; N64-NEXT: move $1, $5 +; N64-NEXT: sll $5, $1, 0 +; N64-NEXT: move $1, $4 +; N64-NEXT: sll $4, $1, 0 +; N64-NEXT: lui $1, %highest(sym) +; N64-NEXT: daddiu $1, $1, %higher(sym) +; N64-NEXT: dsll $1, $1, 16 +; N64-NEXT: daddiu $1, $1, %hi(sym) +; N64-NEXT: dsll $1, $1, 16 +; N64-NEXT: ld $3, %lo(sym)($1) ; N64-NEXT: sync ; N64-NEXT: .LBB0_1: # %entry ; N64-NEXT: # =>This Inner Loop Header: Depth=1 -; N64-NEXT: ll $4, 0($3) -; N64-NEXT: bne $4, $2, .LBB0_3 +; N64-NEXT: ll $1, 0($3) +; N64-NEXT: bne $1, $4, .LBB0_3 ; N64-NEXT: nop ; N64-NEXT: # %bb.2: # %entry ; N64-NEXT: # in Loop: Header=BB0_1 Depth=1 -; N64-NEXT: move $5, $1 -; N64-NEXT: sc $5, 0($3) -; N64-NEXT: beqz $5, .LBB0_1 +; N64-NEXT: move $2, $5 +; N64-NEXT: sc $2, 0($3) +; N64-NEXT: beqz $2, .LBB0_1 ; N64-NEXT: nop ; N64-NEXT: .LBB0_3: # %entry ; N64-NEXT: sync Index: llvm/test/CodeGen/Mips/copy-fp64.ll =================================================================== --- llvm/test/CodeGen/Mips/copy-fp64.ll +++ llvm/test/CodeGen/Mips/copy-fp64.ll @@ -11,8 +11,8 @@ ; CHECK: successors: %bb.1(0x80000000) ; CHECK: liveins: $d12_64, $t9, $v0 ; CHECK: renamable $at = ADDu killed $v0, killed $t9 + ; CHECK: renamable $d6_64 = COPY killed $d12_64 ; CHECK: ADJCALLSTACKDOWN 16, 0, implicit-def $sp, implicit $sp - ; CHECK: $d6_64 = COPY killed renamable $d12_64 ; CHECK: renamable $t9 = LW killed renamable $at, target-flags(mips-got) @bar ; CHECK: dead $ra = JALR killed $t9, csr_o32_fp64, target-flags(mips-jalr) , implicit-def dead $ra, implicit killed $d6_64, implicit-def $d0_64 ; CHECK: ADJCALLSTACKUP 16, 0, implicit-def $sp, implicit $sp Index: llvm/test/CodeGen/Mips/implicit-sret.ll =================================================================== --- llvm/test/CodeGen/Mips/implicit-sret.ll +++ llvm/test/CodeGen/Mips/implicit-sret.ll @@ -20,9 +20,8 @@ ; CHECK-NEXT: ld $5, 16($sp) ; CHECK-NEXT: ld $7, 32($sp) ; CHECK-NEXT: lw $1, 8($sp) -; CHECK-NEXT: # implicit-def: $v0_64 -; CHECK-NEXT: move $2, $1 -; CHECK-NEXT: move $4, $2 +; CHECK-NEXT: # implicit-def: $a0_64 +; CHECK-NEXT: move $4, $1 ; CHECK-NEXT: jal use_sret ; CHECK-NEXT: nop ; CHECK-NEXT: ld $ra, 40($sp) # 8-byte Folded Reload @@ -41,15 +40,15 @@ define internal { i32, i128, i64 } @implicit_sret_impl() unnamed_addr nounwind { ; CHECK-LABEL: implicit_sret_impl: ; CHECK: # %bb.0: -; CHECK-NEXT: move $1, $4 -; CHECK-NEXT: daddiu $2, $zero, 20 -; CHECK-NEXT: sd $2, 16($4) -; CHECK-NEXT: daddiu $2, $zero, 0 +; CHECK-NEXT: # kill: def $at_64 killed $a0_64 +; CHECK-NEXT: daddiu $1, $zero, 20 +; CHECK-NEXT: sd $1, 16($4) +; CHECK-NEXT: daddiu $1, $zero, 0 ; CHECK-NEXT: sd $zero, 8($4) -; CHECK-NEXT: daddiu $3, $zero, 30 -; CHECK-NEXT: sd $3, 24($4) -; CHECK-NEXT: addiu $3, $zero, 10 -; CHECK-NEXT: sw $3, 0($4) +; CHECK-NEXT: daddiu $1, $zero, 30 +; CHECK-NEXT: sd $1, 24($4) +; CHECK-NEXT: addiu $1, $zero, 10 +; CHECK-NEXT: sw $1, 0($4) ; CHECK-NEXT: jr $ra ; CHECK-NEXT: nop ret { i32, i128, i64 } { i32 10, i128 20, i64 30 } @@ -70,12 +69,10 @@ ; CHECK-NEXT: lw $3, 4($sp) ; CHECK-NEXT: # implicit-def: $a0_64 ; CHECK-NEXT: move $4, $3 -; CHECK-NEXT: # implicit-def: $v1_64 -; CHECK-NEXT: move $3, $2 -; CHECK-NEXT: # implicit-def: $v0_64 -; CHECK-NEXT: move $2, $1 -; CHECK-NEXT: move $5, $3 -; CHECK-NEXT: move $6, $2 +; CHECK-NEXT: # implicit-def: $a1_64 +; CHECK-NEXT: move $5, $2 +; CHECK-NEXT: # implicit-def: $a2_64 +; CHECK-NEXT: move $6, $1 ; CHECK-NEXT: jal use_sret2 ; CHECK-NEXT: nop ; CHECK-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload @@ -95,19 +92,19 @@ define internal { i32, i32, i32, i32, i32, i32 } @implicit_sret_impl2() unnamed_addr nounwind { ; CHECK-LABEL: implicit_sret_impl2: ; CHECK: # %bb.0: -; CHECK-NEXT: move $1, $4 -; CHECK-NEXT: addiu $2, $zero, 6 -; CHECK-NEXT: sw $2, 20($4) -; CHECK-NEXT: addiu $2, $zero, 5 -; CHECK-NEXT: sw $2, 16($4) -; CHECK-NEXT: addiu $2, $zero, 4 -; CHECK-NEXT: sw $2, 12($4) -; CHECK-NEXT: addiu $2, $zero, 3 -; CHECK-NEXT: sw $2, 8($4) -; CHECK-NEXT: addiu $2, $zero, 2 -; CHECK-NEXT: sw $2, 4($4) -; CHECK-NEXT: addiu $2, $zero, 1 -; CHECK-NEXT: sw $2, 0($4) +; CHECK-NEXT: # kill: def $at_64 killed $a0_64 +; CHECK-NEXT: addiu $1, $zero, 6 +; CHECK-NEXT: sw $1, 20($4) +; CHECK-NEXT: addiu $1, $zero, 5 +; CHECK-NEXT: sw $1, 16($4) +; CHECK-NEXT: addiu $1, $zero, 4 +; CHECK-NEXT: sw $1, 12($4) +; CHECK-NEXT: addiu $1, $zero, 3 +; CHECK-NEXT: sw $1, 8($4) +; CHECK-NEXT: addiu $1, $zero, 2 +; CHECK-NEXT: sw $1, 4($4) +; CHECK-NEXT: addiu $1, $zero, 1 +; CHECK-NEXT: sw $1, 0($4) ; CHECK-NEXT: jr $ra ; CHECK-NEXT: nop ret { i32, i32, i32, i32, i32, i32 } { i32 1, i32 2, i32 3, i32 4, i32 5, i32 6 } Index: llvm/test/CodeGen/Mips/micromips-eva.mir =================================================================== --- llvm/test/CodeGen/Mips/micromips-eva.mir +++ llvm/test/CodeGen/Mips/micromips-eva.mir @@ -196,19 +196,19 @@ ... -# CHECK: 60 41 60 05 lbue $2, 5($1) -# CHECK: 60 41 68 05 lbe $2, 5($1) -# CHECK: 60 41 a8 03 sbe $2, 3($1) +# CHECK: 60 22 60 05 lbue $1, 5($2) +# CHECK: 60 22 68 05 lbe $1, 5($2) +# CHECK: 60 22 a8 03 sbe $1, 3($2) -# CHECK: 60 41 62 0a lhue $2, 10($1) -# CHECK: 60 41 6a 0a lhe $2, 10($1) -# CHECK: 60 41 aa 06 she $2, 6($1) +# CHECK: 60 22 62 0a lhue $1, 10($2) +# CHECK: 60 22 6a 0a lhe $1, 10($2) +# CHECK: 60 22 aa 06 she $1, 6($2) -# CHECK: 60 41 6e 14 lwe $2, 20($1) -# CHECK: 60 41 ae 0c swe $2, 12($1) +# CHECK: 60 22 6e 14 lwe $1, 20($2) +# CHECK: 60 22 ae 0c swe $1, 12($2) -# CHECK: 60 41 6c 00 lle $2, 0($1) -# CHECK: 60 81 ac 00 sce $4, 0($1) +# CHECK: 60 22 6c 00 lle $1, 0($2) +# CHECK: 60 22 ac 00 sce $1, 0($2) # CHECK: 60 41 a6 05 cachee 2, 5($1) # CHECK: 60 41 a4 05 prefe 2, 5($1) Index: llvm/test/CodeGen/Mips/msa/ldr_str.ll =================================================================== --- llvm/test/CodeGen/Mips/msa/ldr_str.ll +++ llvm/test/CodeGen/Mips/msa/ldr_str.ll @@ -11,47 +11,47 @@ define void @llvm_mips_ldr_d_test(<2 x i64>* %val, i8* %ptr) nounwind { ; MIPS32R5-EB-LABEL: llvm_mips_ldr_d_test: ; MIPS32R5-EB: # %bb.0: # %entry -; MIPS32R5-EB-NEXT: # implicit-def: $at -; MIPS32R5-EB-NEXT: lwr $1, 23($5) -; MIPS32R5-EB-NEXT: lwl $1, 20($5) ; MIPS32R5-EB-NEXT: # implicit-def: $v0 -; MIPS32R5-EB-NEXT: lwr $2, 19($5) -; MIPS32R5-EB-NEXT: lwl $2, 16($5) -; MIPS32R5-EB-NEXT: fill.w $w0, $1 -; MIPS32R5-EB-NEXT: insert.w $w0[1], $2 +; MIPS32R5-EB-NEXT: lwr $2, 23($5) +; MIPS32R5-EB-NEXT: lwl $2, 20($5) +; MIPS32R5-EB-NEXT: # implicit-def: $at +; MIPS32R5-EB-NEXT: lwr $1, 19($5) +; MIPS32R5-EB-NEXT: lwl $1, 16($5) +; MIPS32R5-EB-NEXT: fill.w $w0, $2 +; MIPS32R5-EB-NEXT: insert.w $w0[1], $1 ; MIPS32R5-EB-NEXT: st.d $w0, 0($4) ; MIPS32R5-EB-NEXT: jr $ra ; MIPS32R5-EB-NEXT: nop ; ; MIPS32R5-EL-LABEL: llvm_mips_ldr_d_test: ; MIPS32R5-EL: # %bb.0: # %entry -; MIPS32R5-EL-NEXT: # implicit-def: $at -; MIPS32R5-EL-NEXT: lwr $1, 16($5) -; MIPS32R5-EL-NEXT: lwl $1, 19($5) ; MIPS32R5-EL-NEXT: # implicit-def: $v0 -; MIPS32R5-EL-NEXT: lwr $2, 20($5) -; MIPS32R5-EL-NEXT: lwl $2, 23($5) -; MIPS32R5-EL-NEXT: fill.w $w0, $1 -; MIPS32R5-EL-NEXT: insert.w $w0[1], $2 +; MIPS32R5-EL-NEXT: lwr $2, 16($5) +; MIPS32R5-EL-NEXT: lwl $2, 19($5) +; MIPS32R5-EL-NEXT: # implicit-def: $at +; MIPS32R5-EL-NEXT: lwr $1, 20($5) +; MIPS32R5-EL-NEXT: lwl $1, 23($5) +; MIPS32R5-EL-NEXT: fill.w $w0, $2 +; MIPS32R5-EL-NEXT: insert.w $w0[1], $1 ; MIPS32R5-EL-NEXT: st.d $w0, 0($4) ; MIPS32R5-EL-NEXT: jr $ra ; MIPS32R5-EL-NEXT: nop ; ; MIPS32R6-EB-LABEL: llvm_mips_ldr_d_test: ; MIPS32R6-EB: # %bb.0: # %entry -; MIPS32R6-EB-NEXT: lw $1, 20($5) -; MIPS32R6-EB-NEXT: lw $2, 16($5) -; MIPS32R6-EB-NEXT: fill.w $w0, $1 -; MIPS32R6-EB-NEXT: insert.w $w0[1], $2 +; MIPS32R6-EB-NEXT: lw $2, 20($5) +; MIPS32R6-EB-NEXT: lw $1, 16($5) +; MIPS32R6-EB-NEXT: fill.w $w0, $2 +; MIPS32R6-EB-NEXT: insert.w $w0[1], $1 ; MIPS32R6-EB-NEXT: st.d $w0, 0($4) ; MIPS32R6-EB-NEXT: jrc $ra ; ; MIPS32R6-EL-LABEL: llvm_mips_ldr_d_test: ; MIPS32R6-EL: # %bb.0: # %entry -; MIPS32R6-EL-NEXT: lw $1, 16($5) -; MIPS32R6-EL-NEXT: lw $2, 20($5) -; MIPS32R6-EL-NEXT: fill.w $w0, $1 -; MIPS32R6-EL-NEXT: insert.w $w0[1], $2 +; MIPS32R6-EL-NEXT: lw $2, 16($5) +; MIPS32R6-EL-NEXT: lw $1, 20($5) +; MIPS32R6-EL-NEXT: fill.w $w0, $2 +; MIPS32R6-EL-NEXT: insert.w $w0[1], $1 ; MIPS32R6-EL-NEXT: st.d $w0, 0($4) ; MIPS32R6-EL-NEXT: jrc $ra ; @@ -122,43 +122,43 @@ ; MIPS32R5-EB-LABEL: llvm_mips_str_d_test: ; MIPS32R5-EB: # %bb.0: # %entry ; MIPS32R5-EB-NEXT: ld.d $w0, 0($4) -; MIPS32R5-EB-NEXT: copy_s.w $1, $w0[0] -; MIPS32R5-EB-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5-EB-NEXT: swr $1, 19($5) -; MIPS32R5-EB-NEXT: swl $1, 16($5) -; MIPS32R5-EB-NEXT: swr $2, 23($5) -; MIPS32R5-EB-NEXT: swl $2, 20($5) +; MIPS32R5-EB-NEXT: copy_s.w $2, $w0[0] +; MIPS32R5-EB-NEXT: copy_s.w $1, $w0[1] +; MIPS32R5-EB-NEXT: swr $2, 19($5) +; MIPS32R5-EB-NEXT: swl $2, 16($5) +; MIPS32R5-EB-NEXT: swr $1, 23($5) +; MIPS32R5-EB-NEXT: swl $1, 20($5) ; MIPS32R5-EB-NEXT: jr $ra ; MIPS32R5-EB-NEXT: nop ; ; MIPS32R5-EL-LABEL: llvm_mips_str_d_test: ; MIPS32R5-EL: # %bb.0: # %entry ; MIPS32R5-EL-NEXT: ld.d $w0, 0($4) -; MIPS32R5-EL-NEXT: copy_s.w $1, $w0[0] -; MIPS32R5-EL-NEXT: copy_s.w $2, $w0[1] -; MIPS32R5-EL-NEXT: swr $1, 16($5) -; MIPS32R5-EL-NEXT: swl $1, 19($5) -; MIPS32R5-EL-NEXT: swr $2, 20($5) -; MIPS32R5-EL-NEXT: swl $2, 23($5) +; MIPS32R5-EL-NEXT: copy_s.w $2, $w0[0] +; MIPS32R5-EL-NEXT: copy_s.w $1, $w0[1] +; MIPS32R5-EL-NEXT: swr $2, 16($5) +; MIPS32R5-EL-NEXT: swl $2, 19($5) +; MIPS32R5-EL-NEXT: swr $1, 20($5) +; MIPS32R5-EL-NEXT: swl $1, 23($5) ; MIPS32R5-EL-NEXT: jr $ra ; MIPS32R5-EL-NEXT: nop ; ; MIPS32R6-EB-LABEL: llvm_mips_str_d_test: ; MIPS32R6-EB: # %bb.0: # %entry ; MIPS32R6-EB-NEXT: ld.d $w0, 0($4) -; MIPS32R6-EB-NEXT: copy_s.w $1, $w0[0] -; MIPS32R6-EB-NEXT: copy_s.w $2, $w0[1] -; MIPS32R6-EB-NEXT: sw $1, 20($5) -; MIPS32R6-EB-NEXT: sw $2, 16($5) +; MIPS32R6-EB-NEXT: copy_s.w $2, $w0[0] +; MIPS32R6-EB-NEXT: copy_s.w $1, $w0[1] +; MIPS32R6-EB-NEXT: sw $2, 20($5) +; MIPS32R6-EB-NEXT: sw $1, 16($5) ; MIPS32R6-EB-NEXT: jrc $ra ; ; MIPS32R6-EL-LABEL: llvm_mips_str_d_test: ; MIPS32R6-EL: # %bb.0: # %entry ; MIPS32R6-EL-NEXT: ld.d $w0, 0($4) -; MIPS32R6-EL-NEXT: copy_s.w $1, $w0[0] -; MIPS32R6-EL-NEXT: copy_s.w $2, $w0[1] -; MIPS32R6-EL-NEXT: sw $1, 16($5) -; MIPS32R6-EL-NEXT: sw $2, 20($5) +; MIPS32R6-EL-NEXT: copy_s.w $2, $w0[0] +; MIPS32R6-EL-NEXT: copy_s.w $1, $w0[1] +; MIPS32R6-EL-NEXT: sw $2, 16($5) +; MIPS32R6-EL-NEXT: sw $1, 20($5) ; MIPS32R6-EL-NEXT: jrc $ra ; ; MIPS64R6-LABEL: llvm_mips_str_d_test: Index: llvm/test/CodeGen/PowerPC/addegluecrash.ll =================================================================== --- llvm/test/CodeGen/PowerPC/addegluecrash.ll +++ llvm/test/CodeGen/PowerPC/addegluecrash.ll @@ -6,27 +6,30 @@ define void @bn_mul_comba8(i64* nocapture %r, i64* nocapture readonly %a, i64* nocapture readonly %b) { ; CHECK-LABEL: bn_mul_comba8: ; CHECK: # %bb.0: -; CHECK-NEXT: ld 6, 0(4) -; CHECK-NEXT: ld 7, 0(5) -; CHECK-NEXT: mulhdu 8, 7, 6 -; CHECK-NEXT: ld 4, 8(4) -; CHECK-NEXT: mulld 9, 4, 6 -; CHECK-NEXT: mulhdu 4, 4, 6 -; CHECK-NEXT: addc 6, 9, 8 -; CHECK-NEXT: addze 4, 4 +; CHECK-NEXT: std 4, -8(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: ld 3, -8(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 9, 0(3) +; CHECK-NEXT: ld 8, 0(5) +; CHECK-NEXT: mulhdu 7, 8, 9 +; CHECK-NEXT: ld 3, 8(3) +; CHECK-NEXT: mulld 6, 3, 9 +; CHECK-NEXT: mulhdu 3, 3, 9 +; CHECK-NEXT: addc 6, 6, 7 +; CHECK-NEXT: addze 3, 3 ; CHECK-NEXT: ld 5, 8(5) -; CHECK-NEXT: mulld 8, 5, 7 -; CHECK-NEXT: mulhdu 5, 5, 7 -; CHECK-NEXT: addc 6, 6, 8 +; CHECK-NEXT: mulld 7, 5, 8 +; CHECK-NEXT: mulhdu 5, 5, 8 +; CHECK-NEXT: addc 6, 6, 7 ; CHECK-NEXT: addze 5, 5 -; CHECK-NEXT: add 4, 5, 4 -; CHECK-NEXT: cmpld 7, 4, 5 -; CHECK-NEXT: mfocrf 4, 1 -; CHECK-NEXT: rlwinm 4, 4, 29, 31, 31 -; CHECK-NEXT: # implicit-def: $x5 -; CHECK-NEXT: mr 5, 4 -; CHECK-NEXT: clrldi 4, 5, 32 -; CHECK-NEXT: std 4, 0(3) +; CHECK-NEXT: add 3, 5, 3 +; CHECK-NEXT: cmpld 7, 3, 5 +; CHECK-NEXT: mfocrf 3, 1 +; CHECK-NEXT: rlwinm 5, 3, 29, 31, 31 +; CHECK-NEXT: # implicit-def: $x3 +; CHECK-NEXT: mr 3, 5 +; CHECK-NEXT: clrldi 3, 3, 32 +; CHECK-NEXT: std 3, 0(4) ; CHECK-NEXT: blr %1 = load i64, i64* %a, align 8 %conv = zext i64 %1 to i128 Index: llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll =================================================================== --- llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll +++ llvm/test/CodeGen/PowerPC/aggressive-anti-dep-breaker-subreg.ll @@ -9,7 +9,7 @@ lnext: %elementArray = load i32*, i32** %elementArrayPtr, align 8 -; CHECK: lwz [[LDREG:[0-9]+]], 124(1) # 4-byte Folded Reload +; CHECK: lwz [[LDREG:[0-9]+]], 140(1) # 4-byte Folded Reload ; CHECK: # implicit-def: $x[[TEMPREG:[0-9]+]] %element = load i32, i32* %elementArray, align 4 ; CHECK: mr [[TEMPREG]], [[LDREG]] Index: llvm/test/CodeGen/PowerPC/aix-overflow-toc.py =================================================================== --- llvm/test/CodeGen/PowerPC/aix-overflow-toc.py +++ llvm/test/CodeGen/PowerPC/aix-overflow-toc.py @@ -26,41 +26,41 @@ print("}") # 32-bit assembly check -# ASM32: lwz 3, L..C0(2) -# ASM32: lwz 3, L..C1(2) +# ASM32: lwz 4, L..C0(2) +# ASM32: lwz 4, L..C1(2) -# ASM32: lwz 3, L..C8191(2) -# ASM32: lwz 3, L..C8192-65536(2) -# ASM32: lwz 3, L..C8193-65536(2) +# ASM32: lwz 4, L..C8191(2) +# ASM32: lwz 4, L..C8192-65536(2) +# ASM32: lwz 4, L..C8193-65536(2) -# ASM32: lwz 3, L..C12288-65536(2) -# ASM32: lwz 3, L..C12289-65536(2) +# ASM32: lwz 4, L..C12288-65536(2) +# ASM32: lwz 4, L..C12289-65536(2) # 64-bit assembly check -# ASM64: ld 3, L..C0(2) -# ASM64: ld 3, L..C1(2) +# ASM64: ld 4, L..C0(2) +# ASM64: ld 4, L..C1(2) -# ASM64: ld 3, L..C4095(2) -# ASM64: ld 3, L..C4096-65536(2) -# ASM64: ld 3, L..C4097-65536(2) +# ASM64: ld 4, L..C4095(2) +# ASM64: ld 4, L..C4096-65536(2) +# ASM64: ld 4, L..C4097-65536(2) -# ASM64: ld 3, L..C12287-65536(2) -# ASM64: ld 3, L..C12288-131072(2) -# ASM64: ld 3, L..C12289-131072(2) +# ASM64: ld 4, L..C12287-65536(2) +# ASM64: ld 4, L..C12288-131072(2) +# ASM64: ld 4, L..C12289-131072(2) -# DIS32: 0: 80 62 00 00 lwz 3, 0(2) +# DIS32: 0: 80 82 00 00 lwz 4, 0(2) # DIS32: 00000002: R_TOC (idx: 24590) a0[TC] -# DIS32: c: 80 62 00 04 lwz 3, 4(2) +# DIS32: c: 80 82 00 04 lwz 4, 4(2) # DIS32: 0000000e: R_TOC (idx: 24592) a1[TC] -# DIS32: fffc: 80 62 7f fc lwz 3, 32764(2) +# DIS32: fffc: 80 82 7f fc lwz 4, 32764(2) # DIS32: 0000fffe: R_TOC (idx: 40972) a8191[TC] -# DIS32: 10004: 80 62 80 00 lwz 3, -32768(2) +# DIS32: 10004: 80 82 80 00 lwz 4, -32768(2) # DIS32: 00010006: R_TOC (idx: 40974) a8192[TC] -# DIS32: 1000c: 80 62 80 04 lwz 3, -32764(2) +# DIS32: 1000c: 80 82 80 04 lwz 4, -32764(2) # DIS32: 0001000e: R_TOC (idx: 40976) a8193[TC] -# DIS32: 18004: 80 62 c0 00 lwz 3, -16384(2) +# DIS32: 18004: 80 82 c0 00 lwz 4, -16384(2) # DIS32: 00018006: R_TOC (idx: 49166) a12288[TC] -# DIS32: 1800c: 80 62 c0 04 lwz 3, -16380(2) +# DIS32: 1800c: 80 82 c0 04 lwz 4, -16380(2) # DIS32: 0001800e: R_TOC (idx: 49168) a12289[TC] Index: llvm/test/CodeGen/PowerPC/anon_aggr.ll =================================================================== --- llvm/test/CodeGen/PowerPC/anon_aggr.ll +++ llvm/test/CodeGen/PowerPC/anon_aggr.ll @@ -19,9 +19,9 @@ } ; CHECK-LABEL: func1: -; CHECK: cmpld {{([0-9]+,)?}}4, 5 ; CHECK-DAG: std 3, -[[OFFSET1:[0-9]+]] ; CHECK-DAG: std 5, -[[OFFSET2:[0-9]+]] +; CHECK: cmpld {{([0-9]+,)?}}4, 5 ; CHECK: ld 3, -[[OFFSET1]](1) ; CHECK: ld 3, -[[OFFSET2]](1) @@ -38,13 +38,13 @@ ret i8* %array2_ptr } ; CHECK-LABEL: func2: -; CHECK-DAG: cmpld {{([0-9]+,)?}}4, 5 +; CHECK-DAG: cmpld {{([0-9]+,)?}}4, 3 ; CHECK-DAG: std 6, 72(1) ; CHECK-DAG: std 5, 64(1) -; CHECK-DAG: std 5, -[[OFFSET1:[0-9]+]] +; CHECK-DAG: std 3, -[[OFFSET1:[0-9]+]] ; CHECK-DAG: std 3, -[[OFFSET2:[0-9]+]] -; CHECK: ld 3, -[[OFFSET2]](1) ; CHECK: ld 3, -[[OFFSET1]](1) +; CHECK: ld 3, -[[OFFSET2]](1) define i8* @func3({ i64, i8* }* byval %array1, %tarray* byval %array2) { entry: @@ -85,9 +85,9 @@ ; CHECK-LABEL: func4: ; CHECK-DAG: ld [[REG2:[0-9]+]], 120(1) ; CHECK-DAG: ld [[REG3:[0-9]+]], 136(1) -; CHECK-DAG: cmpld {{([0-9]+,)?}}[[REG2]], [[REG3]] -; CHECK: std [[REG2]], -[[OFFSET1:[0-9]+]](1) +; CHECK-DAG: std [[REG2]], -[[OFFSET1:[0-9]+]](1) ; CHECK: std [[REG3]], -[[OFFSET2:[0-9]+]](1) +; CHECK: cmpld {{([0-9]+,)?}}[[REG2]], [[REG3]] ; CHECK: ld 3, -[[OFFSET1]](1) ; CHECK: ld 3, -[[OFFSET2]](1) Index: llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll =================================================================== --- llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll +++ llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll @@ -248,8 +248,7 @@ ; ; CHECK-O0-LABEL: vec_xl_zext: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: lxvrbx vs0, r4, r3 -; CHECK-O0-NEXT: xxlor v2, vs0, vs0 +; CHECK-O0-NEXT: lxvrbx v2, r4, r3 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %__pointer, i64 %__offset @@ -269,8 +268,7 @@ ; CHECK-O0-LABEL: vec_xl_zext_short: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: sldi r3, r3, 1 -; CHECK-O0-NEXT: lxvrhx vs0, r4, r3 -; CHECK-O0-NEXT: xxlor v2, vs0, vs0 +; CHECK-O0-NEXT: lxvrhx v2, r4, r3 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i16, i16* %__pointer, i64 %__offset @@ -290,8 +288,7 @@ ; CHECK-O0-LABEL: vec_xl_zext_word: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: sldi r3, r3, 2 -; CHECK-O0-NEXT: lxvrwx vs0, r4, r3 -; CHECK-O0-NEXT: xxlor v2, vs0, vs0 +; CHECK-O0-NEXT: lxvrwx v2, r4, r3 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %__pointer, i64 %__offset @@ -311,8 +308,7 @@ ; CHECK-O0-LABEL: vec_xl_zext_dw: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: sldi r3, r3, 3 -; CHECK-O0-NEXT: lxvrdx vs0, r4, r3 -; CHECK-O0-NEXT: xxlor v2, vs0, vs0 +; CHECK-O0-NEXT: lxvrdx v2, r4, r3 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i64, i64* %__pointer, i64 %__offset @@ -334,9 +330,9 @@ ; CHECK-O0-LABEL: vec_xl_sext_b: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: lbzx r3, r4, r3 -; CHECK-O0-NEXT: extsb r3, r3 -; CHECK-O0-NEXT: sradi r4, r3, 63 -; CHECK-O0-NEXT: mtvsrdd v2, r4, r3 +; CHECK-O0-NEXT: extsb r4, r3 +; CHECK-O0-NEXT: sradi r3, r4, 63 +; CHECK-O0-NEXT: mtvsrdd v2, r3, r4 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i8, i8* %p, i64 %offset @@ -358,9 +354,9 @@ ; CHECK-O0-LABEL: vec_xl_sext_h: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: sldi r3, r3, 1 -; CHECK-O0-NEXT: lhax r3, r4, r3 -; CHECK-O0-NEXT: sradi r4, r3, 63 -; CHECK-O0-NEXT: mtvsrdd v2, r4, r3 +; CHECK-O0-NEXT: lhax r4, r4, r3 +; CHECK-O0-NEXT: sradi r3, r4, 63 +; CHECK-O0-NEXT: mtvsrdd v2, r3, r4 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i16, i16* %p, i64 %offset @@ -382,9 +378,9 @@ ; CHECK-O0-LABEL: vec_xl_sext_w: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: sldi r3, r3, 2 -; CHECK-O0-NEXT: lwax r3, r4, r3 -; CHECK-O0-NEXT: sradi r4, r3, 63 -; CHECK-O0-NEXT: mtvsrdd v2, r4, r3 +; CHECK-O0-NEXT: lwax r4, r4, r3 +; CHECK-O0-NEXT: sradi r3, r4, 63 +; CHECK-O0-NEXT: mtvsrdd v2, r3, r4 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i32, i32* %p, i64 %offset @@ -406,9 +402,9 @@ ; CHECK-O0-LABEL: vec_xl_sext_d: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: sldi r3, r3, 3 -; CHECK-O0-NEXT: ldx r3, r4, r3 -; CHECK-O0-NEXT: sradi r4, r3, 63 -; CHECK-O0-NEXT: mtvsrdd v2, r4, r3 +; CHECK-O0-NEXT: ldx r4, r4, r3 +; CHECK-O0-NEXT: sradi r3, r4, 63 +; CHECK-O0-NEXT: mtvsrdd v2, r3, r4 ; CHECK-O0-NEXT: blr entry: %add.ptr = getelementptr inbounds i64, i64* %p, i64 %offset Index: llvm/test/CodeGen/PowerPC/elf-common.ll =================================================================== --- llvm/test/CodeGen/PowerPC/elf-common.ll +++ llvm/test/CodeGen/PowerPC/elf-common.ll @@ -6,7 +6,7 @@ ; RUN: llc -relocation-model=pic -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ ; RUN: -mcpu=pwr8 < %s | FileCheck -check-prefix=PIC %s -; Test correct code generation for static and pic for loading and storing a common symbol +; Test correct code generation for static and pic for loading and storing a common symbol @comm_glob = common global i32 0, align 4 @@ -14,11 +14,11 @@ ; NOOPT-LABEL: test_comm: ; NOOPT: # %bb.0: # %entry ; NOOPT-NEXT: addis 3, 2, comm_glob@toc@ha -; NOOPT-NEXT: addi 3, 3, comm_glob@toc@l -; NOOPT-NEXT: lwz 4, 0(3) -; NOOPT-NEXT: addi 5, 4, 1 -; NOOPT-NEXT: stw 5, 0(3) -; NOOPT-NEXT: extsw 3, 4 +; NOOPT-NEXT: addi 5, 3, comm_glob@toc@l +; NOOPT-NEXT: lwz 3, 0(5) +; NOOPT-NEXT: addi 4, 3, 1 +; NOOPT-NEXT: stw 4, 0(5) +; NOOPT-NEXT: extsw 3, 3 ; NOOPT-NEXT: blr ; ; STATIC-LABEL: test_comm: Index: llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll =================================================================== --- llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll +++ llvm/test/CodeGen/PowerPC/fast-isel-pcrel.ll @@ -22,12 +22,12 @@ ; CHECK-NEXT: paddi r3, 0, .L.str@PCREL, 1 ; CHECK-NEXT: bl printf@notoc ; CHECK-NEXT: ld r4, 104(r1) -; CHECK-NEXT: lis r5, 16403 -; CHECK-NEXT: ori r5, r5, 62914 -; CHECK-NEXT: sldi r5, r5, 32 -; CHECK-NEXT: oris r5, r5, 36700 -; CHECK-NEXT: ori r5, r5, 10486 -; CHECK-NEXT: std r5, 0(r4) +; CHECK-NEXT: lis r3, 16403 +; CHECK-NEXT: ori r3, r3, 62914 +; CHECK-NEXT: sldi r3, r3, 32 +; CHECK-NEXT: oris r3, r3, 36700 +; CHECK-NEXT: ori r3, r3, 10486 +; CHECK-NEXT: std r3, 0(r4) ; CHECK-NEXT: addi r1, r1, 112 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 @@ -50,16 +50,14 @@ ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: std r0, 16(r1) -; CHECK-NEXT: stdu r1, -112(r1) +; CHECK-NEXT: stdu r1, -96(r1) ; CHECK-NEXT: paddi r3, 0, .L.str.1@PCREL, 1 ; CHECK-NEXT: bl printf@notoc -; CHECK-NEXT: pld r4, stdout@got@pcrel(0), 1 -; CHECK-NEXT: ld r4, 0(r4) -; CHECK-NEXT: li r5, 97 -; CHECK-NEXT: std r3, 104(r1) # 8-byte Folded Spill -; CHECK-NEXT: mr r3, r5 +; CHECK-NEXT: pld r3, stdout@got@pcrel(0), 1 +; CHECK-NEXT: ld r4, 0(r3) +; CHECK-NEXT: li r3, 97 ; CHECK-NEXT: bl _IO_putc@notoc -; CHECK-NEXT: addi r1, r1, 112 +; CHECK-NEXT: addi r1, r1, 96 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 ; CHECK-NEXT: blr Index: llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll =================================================================== --- llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll +++ llvm/test/CodeGen/PowerPC/fp-int128-fp-combine.ll @@ -29,8 +29,7 @@ define float @f_i128_fi_nsz(float %v) #0 { ; CHECK-LABEL: f_i128_fi_nsz: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xsrdpiz 0, 1 -; CHECK-NEXT: fmr 1, 0 +; CHECK-NEXT: xsrdpiz 1, 1 ; CHECK-NEXT: blr entry: %a = fptosi float %v to i128 Index: llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll =================================================================== --- llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll +++ llvm/test/CodeGen/PowerPC/fp-strict-fcmp-noopt.ll @@ -12,17 +12,17 @@ ; CHECK-NEXT: mfocrf r4, 1 ; CHECK-NEXT: rlwinm r4, r4, 31, 31, 31 ; CHECK-NEXT: xori r4, r4, 1 -; CHECK-NEXT: and r3, r3, r4 +; CHECK-NEXT: and r4, r3, r4 ; CHECK-NEXT: xscmpudp cr7, f1, f3 -; CHECK-NEXT: mfocrf r4, 1 -; CHECK-NEXT: rlwinm r4, r4, 31, 31, 31 -; CHECK-NEXT: xori r4, r4, 1 +; CHECK-NEXT: mfocrf r3, 1 +; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 +; CHECK-NEXT: xori r3, r3, 1 ; CHECK-NEXT: xscmpudp cr7, f1, f3 ; CHECK-NEXT: mfocrf r5, 1 ; CHECK-NEXT: rlwinm r5, r5, 31, 31, 31 ; CHECK-NEXT: xori r5, r5, 1 -; CHECK-NEXT: and r4, r4, r5 -; CHECK-NEXT: or r3, r4, r3 +; CHECK-NEXT: and r3, r3, r5 +; CHECK-NEXT: or r3, r3, r4 ; CHECK-NEXT: # kill: def $r4 killed $r3 ; CHECK-NEXT: clrldi r3, r3, 32 ; CHECK-NEXT: blr @@ -42,23 +42,21 @@ ; CHECK-NEXT: xscmpudp cr7, f2, f4 ; CHECK-NEXT: mfocrf r4, 1 ; CHECK-NEXT: rlwinm r4, r4, 30, 31, 31 -; CHECK-NEXT: and r3, r3, r4 -; CHECK-NEXT: xscmpudp cr7, f1, f3 +; CHECK-NEXT: and r4, r3, r4 ; CHECK-NEXT: xscmpudp cr0, f1, f3 -; CHECK-NEXT: mfocrf r4, 1 -; CHECK-NEXT: rotlwi r4, r4, 28 -; CHECK-NEXT: stw r4, -4(r1) -; CHECK-NEXT: mcrf cr7, cr0 -; CHECK-NEXT: mfocrf r4, 1 -; CHECK-NEXT: rlwinm r4, r4, 30, 31, 31 +; CHECK-NEXT: mfocrf r3, 128 +; CHECK-NEXT: stw r3, -4(r1) +; CHECK-NEXT: xscmpudp cr7, f1, f3 +; CHECK-NEXT: mfocrf r3, 1 ; CHECK-NEXT: lwz r5, -4(r1) ; CHECK-NEXT: rotlwi r5, r5, 4 ; CHECK-NEXT: mtocrf 1, r5 -; CHECK-NEXT: mfocrf r5, 1 -; CHECK-NEXT: rlwinm r5, r5, 31, 31, 31 -; CHECK-NEXT: xori r5, r5, 1 -; CHECK-NEXT: and r4, r5, r4 -; CHECK-NEXT: or r3, r4, r3 +; CHECK-NEXT: rlwinm r5, r3, 30, 31, 31 +; CHECK-NEXT: mfocrf r3, 1 +; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 +; CHECK-NEXT: xori r3, r3, 1 +; CHECK-NEXT: and r3, r3, r5 +; CHECK-NEXT: or r3, r3, r4 ; CHECK-NEXT: # kill: def $r4 killed $r3 ; CHECK-NEXT: clrldi r3, r3, 32 ; CHECK-NEXT: blr @@ -74,9 +72,8 @@ ; CHECK-NEXT: xscmpuqp cr7, v2, v3 ; CHECK-NEXT: mfocrf r3, 1 ; CHECK-NEXT: rlwinm r3, r3, 31, 31, 31 -; CHECK-NEXT: xori r3, r3, 1 -; CHECK-NEXT: # implicit-def: $x4 -; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: xori r4, r3, 1 +; CHECK-NEXT: # implicit-def: $x3 ; CHECK-NEXT: mr r3, r4 ; CHECK-NEXT: blr entry: Index: llvm/test/CodeGen/PowerPC/fp64-to-int16.ll =================================================================== --- llvm/test/CodeGen/PowerPC/fp64-to-int16.ll +++ llvm/test/CodeGen/PowerPC/fp64-to-int16.ll @@ -9,9 +9,8 @@ ; CHECK-NEXT: mffprwz 3, 0 ; CHECK-NEXT: xori 3, 3, 65534 ; CHECK-NEXT: cntlzw 3, 3 -; CHECK-NEXT: srwi 3, 3, 5 -; CHECK-NEXT: # implicit-def: $x4 -; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: srwi 4, 3, 5 +; CHECK-NEXT: # implicit-def: $x3 ; CHECK-NEXT: mr 3, 4 ; CHECK-NEXT: blr entry: Index: llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll =================================================================== --- llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll +++ llvm/test/CodeGen/PowerPC/p9-vinsert-vextract.ll @@ -145,19 +145,37 @@ } define <8 x i16> @shuffle_vector_halfword_8_1(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_8_1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 6 -; CHECK-NEXT: vinserth 3, 2, 14 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_8_1: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 6 +; CHECK-OPT-NEXT: vinserth 3, 2, 14 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_8_1: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 12 -; CHECK-BE-NEXT: vinserth 3, 2, 0 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_8_1: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-O0-NEXT: vinserth 2, 3, 14 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_8_1: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 12 +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 0 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_8_1: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-BE-O0-NEXT: vinserth 2, 3, 0 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins @@ -166,131 +184,255 @@ ; The following testcases take one halfword element from the first vector and ; inserts it at various locations in the second vector define <8 x i16> @shuffle_vector_halfword_9_7(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_9_7: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 10 -; CHECK-NEXT: vinserth 3, 2, 12 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_9_7: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 10 +; CHECK-OPT-NEXT: vinserth 3, 2, 12 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_9_7: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 8 -; CHECK-BE-NEXT: vinserth 3, 2, 2 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_9_7: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-O0-NEXT: vinserth 2, 3, 12 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_9_7: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 8 +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 2 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_9_7: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-BE-O0-NEXT: vinserth 2, 3, 2 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins } define <8 x i16> @shuffle_vector_halfword_10_4(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_10_4: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinserth 3, 2, 10 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_10_4: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vinserth 3, 2, 10 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_10_4: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 2 -; CHECK-BE-NEXT: vinserth 3, 2, 4 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_10_4: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vinserth 2, 3, 10 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_10_4: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 2 +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 4 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_10_4: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-BE-O0-NEXT: vinserth 2, 3, 4 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins } define <8 x i16> @shuffle_vector_halfword_11_2(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_11_2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 4 -; CHECK-NEXT: vinserth 3, 2, 8 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_11_2: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 4 +; CHECK-OPT-NEXT: vinserth 3, 2, 8 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_11_2: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 14 -; CHECK-BE-NEXT: vinserth 3, 2, 6 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_11_2: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-O0-NEXT: vinserth 2, 3, 8 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_11_2: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 14 +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 6 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_11_2: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-BE-O0-NEXT: vinserth 2, 3, 6 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins } define <8 x i16> @shuffle_vector_halfword_12_6(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_12_6: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 12 -; CHECK-NEXT: vinserth 3, 2, 6 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_12_6: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 12 +; CHECK-OPT-NEXT: vinserth 3, 2, 6 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_12_6: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 6 -; CHECK-BE-NEXT: vinserth 3, 2, 8 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_12_6: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-O0-NEXT: vinserth 2, 3, 6 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_12_6: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 6 +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 8 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_12_6: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-BE-O0-NEXT: vinserth 2, 3, 8 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins } define <8 x i16> @shuffle_vector_halfword_13_3(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_13_3: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 2 -; CHECK-NEXT: vinserth 3, 2, 4 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_13_3: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 2 +; CHECK-OPT-NEXT: vinserth 3, 2, 4 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_13_3: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinserth 3, 2, 10 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_13_3: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-O0-NEXT: vinserth 2, 3, 4 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_13_3: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 10 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_13_3: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vinserth 2, 3, 10 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins } define <8 x i16> @shuffle_vector_halfword_14_5(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_14_5: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 14 -; CHECK-NEXT: vinserth 3, 2, 2 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_14_5: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 14 +; CHECK-OPT-NEXT: vinserth 3, 2, 2 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_14_5: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 4 -; CHECK-BE-NEXT: vinserth 3, 2, 12 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_14_5: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-O0-NEXT: vinserth 2, 3, 2 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_14_5: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 4 +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 12 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_14_5: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-BE-O0-NEXT: vinserth 2, 3, 12 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins } define <8 x i16> @shuffle_vector_halfword_15_0(<8 x i16> %a, <8 x i16> %b) { -; CHECK-LABEL: shuffle_vector_halfword_15_0: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 8 -; CHECK-NEXT: vinserth 3, 2, 0 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_halfword_15_0: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 8 +; CHECK-OPT-NEXT: vinserth 3, 2, 0 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_halfword_15_0: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 10 -; CHECK-BE-NEXT: vinserth 3, 2, 14 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_halfword_15_0: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-O0-NEXT: vinserth 2, 3, 0 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_halfword_15_0: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 10 +; CHECK-BE-OPT-NEXT: vinserth 3, 2, 14 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_halfword_15_0: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-BE-O0-NEXT: vinserth 2, 3, 14 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> ret <8 x i16> %vecins @@ -718,302 +860,588 @@ ; The following testcases take one byte element from the first vector and ; inserts it at various locations in the second vector define <16 x i8> @shuffle_vector_byte_16_8(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_16_8: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vinsertb 3, 2, 15 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_16_8: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vinsertb 3, 2, 15 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_16_8: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 1 -; CHECK-BE-NEXT: vinsertb 3, 2, 0 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_16_8: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vinsertb 2, 3, 15 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_16_8: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 1 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 0 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_16_8: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 1 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 0 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_17_1(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_17_1: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 7 -; CHECK-NEXT: vinsertb 3, 2, 14 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_17_1: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 7 +; CHECK-OPT-NEXT: vinsertb 3, 2, 14 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_17_1: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 10 -; CHECK-BE-NEXT: vinsertb 3, 2, 1 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_17_1: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 7 +; CHECK-O0-NEXT: vinsertb 2, 3, 14 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_17_1: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 10 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 1 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_17_1: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 1 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_18_10(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_18_10: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 14 -; CHECK-NEXT: vinsertb 3, 2, 13 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_18_10: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 14 +; CHECK-OPT-NEXT: vinsertb 3, 2, 13 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_18_10: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 3 -; CHECK-BE-NEXT: vinsertb 3, 2, 2 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_18_10: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-O0-NEXT: vinsertb 2, 3, 13 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_18_10: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 3 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 2 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_18_10: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 3 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 2 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_19_3(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_19_3: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 5 -; CHECK-NEXT: vinsertb 3, 2, 12 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_19_3: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 5 +; CHECK-OPT-NEXT: vinsertb 3, 2, 12 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_19_3: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 12 -; CHECK-BE-NEXT: vinsertb 3, 2, 3 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_19_3: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 5 +; CHECK-O0-NEXT: vinsertb 2, 3, 12 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_19_3: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 12 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 3 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_19_3: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 3 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_20_12(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_20_12: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 12 -; CHECK-NEXT: vinsertb 3, 2, 11 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_20_12: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 12 +; CHECK-OPT-NEXT: vinsertb 3, 2, 11 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_20_12: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 5 -; CHECK-BE-NEXT: vinsertb 3, 2, 4 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_20_12: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 12 +; CHECK-O0-NEXT: vinsertb 2, 3, 11 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_20_12: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 5 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 4 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_20_12: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 5 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 4 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_21_5(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_21_5: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 3 -; CHECK-NEXT: vinsertb 3, 2, 10 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_21_5: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 3 +; CHECK-OPT-NEXT: vinsertb 3, 2, 10 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_21_5: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 14 -; CHECK-BE-NEXT: vinsertb 3, 2, 5 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_21_5: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 3 +; CHECK-O0-NEXT: vinsertb 2, 3, 10 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_21_5: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 14 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 5 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_21_5: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 14 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 5 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_22_14(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_22_14: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 10 -; CHECK-NEXT: vinsertb 3, 2, 9 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_22_14: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 10 +; CHECK-OPT-NEXT: vinsertb 3, 2, 9 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_22_14: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 7 -; CHECK-BE-NEXT: vinsertb 3, 2, 6 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_22_14: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 10 +; CHECK-O0-NEXT: vinsertb 2, 3, 9 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_22_14: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 7 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 6 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_22_14: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 7 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 6 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_23_7(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_23_7: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 1 -; CHECK-NEXT: vinsertb 3, 2, 8 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_23_7: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 1 +; CHECK-OPT-NEXT: vinsertb 3, 2, 8 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_23_7: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vinsertb 3, 2, 7 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_23_7: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 1 +; CHECK-O0-NEXT: vinsertb 2, 3, 8 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_23_7: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 7 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_23_7: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 7 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_24_0(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_24_0: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 8 -; CHECK-NEXT: vinsertb 3, 2, 7 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_24_0: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 8 +; CHECK-OPT-NEXT: vinsertb 3, 2, 7 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_24_0: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 9 -; CHECK-BE-NEXT: vinsertb 3, 2, 8 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_24_0: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-O0-NEXT: vinsertb 2, 3, 7 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_24_0: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 9 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 8 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_24_0: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 9 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 8 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_25_9(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_25_9: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 15 -; CHECK-NEXT: vinsertb 3, 2, 6 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_25_9: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 15 +; CHECK-OPT-NEXT: vinsertb 3, 2, 6 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_25_9: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 2 -; CHECK-BE-NEXT: vinsertb 3, 2, 9 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_25_9: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 15 +; CHECK-O0-NEXT: vinsertb 2, 3, 6 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_25_9: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 2 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 9 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_25_9: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 9 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_26_2(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_26_2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 6 -; CHECK-NEXT: vinsertb 3, 2, 5 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_26_2: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 6 +; CHECK-OPT-NEXT: vinsertb 3, 2, 5 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_26_2: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 11 -; CHECK-BE-NEXT: vinsertb 3, 2, 10 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_26_2: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-O0-NEXT: vinsertb 2, 3, 5 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_26_2: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 11 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 10 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_26_2: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 11 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 10 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_27_11(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_27_11: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 13 -; CHECK-NEXT: vinsertb 3, 2, 4 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_27_11: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 13 +; CHECK-OPT-NEXT: vinsertb 3, 2, 4 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_27_11: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 4 -; CHECK-BE-NEXT: vinsertb 3, 2, 11 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_27_11: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 13 +; CHECK-O0-NEXT: vinsertb 2, 3, 4 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_27_11: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 4 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 11 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_27_11: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 11 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_28_4(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_28_4: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 4 -; CHECK-NEXT: vinsertb 3, 2, 3 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_28_4: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 4 +; CHECK-OPT-NEXT: vinsertb 3, 2, 3 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_28_4: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 13 -; CHECK-BE-NEXT: vinsertb 3, 2, 12 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_28_4: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 4 +; CHECK-O0-NEXT: vinsertb 2, 3, 3 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_28_4: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 13 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 12 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_28_4: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 13 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 12 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_29_13(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_29_13: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 11 -; CHECK-NEXT: vinsertb 3, 2, 2 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_29_13: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 11 +; CHECK-OPT-NEXT: vinsertb 3, 2, 2 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_29_13: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 6 -; CHECK-BE-NEXT: vinsertb 3, 2, 13 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_29_13: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 11 +; CHECK-O0-NEXT: vinsertb 2, 3, 2 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_29_13: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 6 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 13 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_29_13: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 6 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 13 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_30_6(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_30_6: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 2 -; CHECK-NEXT: vinsertb 3, 2, 1 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_30_6: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 2 +; CHECK-OPT-NEXT: vinsertb 3, 2, 1 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_30_6: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 15 -; CHECK-BE-NEXT: vinsertb 3, 2, 14 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_30_6: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 2 +; CHECK-O0-NEXT: vinsertb 2, 3, 1 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_30_6: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 15 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 14 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_30_6: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 15 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 14 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins } define <16 x i8> @shuffle_vector_byte_31_15(<16 x i8> %a, <16 x i8> %b) { -; CHECK-LABEL: shuffle_vector_byte_31_15: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsldoi 2, 2, 2, 9 -; CHECK-NEXT: vinsertb 3, 2, 0 -; CHECK-NEXT: vmr 2, 3 -; CHECK-NEXT: blr +; CHECK-OPT-LABEL: shuffle_vector_byte_31_15: +; CHECK-OPT: # %bb.0: # %entry +; CHECK-OPT-NEXT: vsldoi 2, 2, 2, 9 +; CHECK-OPT-NEXT: vinsertb 3, 2, 0 +; CHECK-OPT-NEXT: vmr 2, 3 +; CHECK-OPT-NEXT: blr ; -; CHECK-BE-LABEL: shuffle_vector_byte_31_15: -; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: vsldoi 2, 2, 2, 8 -; CHECK-BE-NEXT: vinsertb 3, 2, 15 -; CHECK-BE-NEXT: vmr 2, 3 -; CHECK-BE-NEXT: blr +; CHECK-O0-LABEL: shuffle_vector_byte_31_15: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-O0-NEXT: vmr 3, 2 +; CHECK-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-O0-NEXT: vsldoi 3, 3, 3, 9 +; CHECK-O0-NEXT: vinsertb 2, 3, 0 +; CHECK-O0-NEXT: blr +; +; CHECK-BE-OPT-LABEL: shuffle_vector_byte_31_15: +; CHECK-BE-OPT: # %bb.0: # %entry +; CHECK-BE-OPT-NEXT: vsldoi 2, 2, 2, 8 +; CHECK-BE-OPT-NEXT: vinsertb 3, 2, 15 +; CHECK-BE-OPT-NEXT: vmr 2, 3 +; CHECK-BE-OPT-NEXT: blr +; +; CHECK-BE-O0-LABEL: shuffle_vector_byte_31_15: +; CHECK-BE-O0: # %bb.0: # %entry +; CHECK-BE-O0-NEXT: stxv 35, -16(1) # 16-byte Folded Spill +; CHECK-BE-O0-NEXT: vmr 3, 2 +; CHECK-BE-O0-NEXT: lxv 34, -16(1) # 16-byte Folded Reload +; CHECK-BE-O0-NEXT: vsldoi 3, 3, 3, 8 +; CHECK-BE-O0-NEXT: vinsertb 2, 3, 15 +; CHECK-BE-O0-NEXT: blr entry: %vecins = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %vecins @@ -1321,8 +1749,8 @@ ; ; CHECK-O0-LABEL: insert_halfword_0: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 14 ; CHECK-O0-NEXT: blr @@ -1335,8 +1763,8 @@ ; ; CHECK-BE-O0-LABEL: insert_halfword_0: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 0 ; CHECK-BE-O0-NEXT: blr @@ -1354,8 +1782,8 @@ ; ; CHECK-O0-LABEL: insert_halfword_1: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 12 ; CHECK-O0-NEXT: blr @@ -1368,8 +1796,8 @@ ; ; CHECK-BE-O0-LABEL: insert_halfword_1: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 2 ; CHECK-BE-O0-NEXT: blr @@ -1387,8 +1815,8 @@ ; ; CHECK-O0-LABEL: insert_halfword_2: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 10 ; CHECK-O0-NEXT: blr @@ -1401,8 +1829,8 @@ ; ; CHECK-BE-O0-LABEL: insert_halfword_2: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 4 ; CHECK-BE-O0-NEXT: blr @@ -1420,8 +1848,8 @@ ; ; CHECK-O0-LABEL: insert_halfword_3: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 8 ; CHECK-O0-NEXT: blr @@ -1434,8 +1862,8 @@ ; ; CHECK-BE-O0-LABEL: insert_halfword_3: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 6 ; CHECK-BE-O0-NEXT: blr @@ -1453,8 +1881,8 @@ ; ; CHECK-O0-LABEL: insert_halfword_4: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 6 ; CHECK-O0-NEXT: blr @@ -1467,8 +1895,8 @@ ; ; CHECK-BE-O0-LABEL: insert_halfword_4: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 8 ; CHECK-BE-O0-NEXT: blr @@ -1486,8 +1914,8 @@ ; ; CHECK-O0-LABEL: insert_halfword_5: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 4 ; CHECK-O0-NEXT: blr @@ -1500,8 +1928,8 @@ ; ; CHECK-BE-O0-LABEL: insert_halfword_5: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 10 ; CHECK-BE-O0-NEXT: blr @@ -1519,8 +1947,8 @@ ; ; CHECK-O0-LABEL: insert_halfword_6: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 2 ; CHECK-O0-NEXT: blr @@ -1533,8 +1961,8 @@ ; ; CHECK-BE-O0-LABEL: insert_halfword_6: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 12 ; CHECK-BE-O0-NEXT: blr @@ -1552,8 +1980,8 @@ ; ; CHECK-O0-LABEL: insert_halfword_7: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinserth 2, 3, 0 ; CHECK-O0-NEXT: blr @@ -1566,8 +1994,8 @@ ; ; CHECK-BE-O0-LABEL: insert_halfword_7: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinserth 2, 3, 14 ; CHECK-BE-O0-NEXT: blr @@ -1587,8 +2015,8 @@ ; ; CHECK-O0-LABEL: insert_byte_0: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 15 ; CHECK-O0-NEXT: blr @@ -1601,8 +2029,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_0: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 0 ; CHECK-BE-O0-NEXT: blr @@ -1620,8 +2048,8 @@ ; ; CHECK-O0-LABEL: insert_byte_1: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 14 ; CHECK-O0-NEXT: blr @@ -1634,8 +2062,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_1: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 1 ; CHECK-BE-O0-NEXT: blr @@ -1653,8 +2081,8 @@ ; ; CHECK-O0-LABEL: insert_byte_2: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 13 ; CHECK-O0-NEXT: blr @@ -1667,8 +2095,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_2: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 2 ; CHECK-BE-O0-NEXT: blr @@ -1686,8 +2114,8 @@ ; ; CHECK-O0-LABEL: insert_byte_3: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 12 ; CHECK-O0-NEXT: blr @@ -1700,8 +2128,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_3: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 3 ; CHECK-BE-O0-NEXT: blr @@ -1719,8 +2147,8 @@ ; ; CHECK-O0-LABEL: insert_byte_4: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 11 ; CHECK-O0-NEXT: blr @@ -1733,8 +2161,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_4: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 4 ; CHECK-BE-O0-NEXT: blr @@ -1752,8 +2180,8 @@ ; ; CHECK-O0-LABEL: insert_byte_5: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 10 ; CHECK-O0-NEXT: blr @@ -1766,8 +2194,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_5: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 5 ; CHECK-BE-O0-NEXT: blr @@ -1785,8 +2213,8 @@ ; ; CHECK-O0-LABEL: insert_byte_6: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 9 ; CHECK-O0-NEXT: blr @@ -1799,8 +2227,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_6: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 6 ; CHECK-BE-O0-NEXT: blr @@ -1818,8 +2246,8 @@ ; ; CHECK-O0-LABEL: insert_byte_7: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 8 ; CHECK-O0-NEXT: blr @@ -1832,8 +2260,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_7: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 7 ; CHECK-BE-O0-NEXT: blr @@ -1851,8 +2279,8 @@ ; ; CHECK-O0-LABEL: insert_byte_8: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 7 ; CHECK-O0-NEXT: blr @@ -1865,8 +2293,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_8: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 8 ; CHECK-BE-O0-NEXT: blr @@ -1884,8 +2312,8 @@ ; ; CHECK-O0-LABEL: insert_byte_9: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 6 ; CHECK-O0-NEXT: blr @@ -1898,8 +2326,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_9: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 9 ; CHECK-BE-O0-NEXT: blr @@ -1917,8 +2345,8 @@ ; ; CHECK-O0-LABEL: insert_byte_10: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 5 ; CHECK-O0-NEXT: blr @@ -1931,8 +2359,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_10: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 10 ; CHECK-BE-O0-NEXT: blr @@ -1950,8 +2378,8 @@ ; ; CHECK-O0-LABEL: insert_byte_11: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 4 ; CHECK-O0-NEXT: blr @@ -1964,8 +2392,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_11: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 11 ; CHECK-BE-O0-NEXT: blr @@ -1983,8 +2411,8 @@ ; ; CHECK-O0-LABEL: insert_byte_12: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 3 ; CHECK-O0-NEXT: blr @@ -1997,8 +2425,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_12: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 12 ; CHECK-BE-O0-NEXT: blr @@ -2016,8 +2444,8 @@ ; ; CHECK-O0-LABEL: insert_byte_13: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 2 ; CHECK-O0-NEXT: blr @@ -2030,8 +2458,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_13: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 13 ; CHECK-BE-O0-NEXT: blr @@ -2049,8 +2477,8 @@ ; ; CHECK-O0-LABEL: insert_byte_14: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 1 ; CHECK-O0-NEXT: blr @@ -2063,8 +2491,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_14: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 14 ; CHECK-BE-O0-NEXT: blr @@ -2082,8 +2510,8 @@ ; ; CHECK-O0-LABEL: insert_byte_15: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-O0-NEXT: mtfprwz 0, 5 +; CHECK-O0-NEXT: mr 3, 5 +; CHECK-O0-NEXT: mtfprwz 0, 3 ; CHECK-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-O0-NEXT: vinsertb 2, 3, 0 ; CHECK-O0-NEXT: blr @@ -2096,8 +2524,8 @@ ; ; CHECK-BE-O0-LABEL: insert_byte_15: ; CHECK-BE-O0: # %bb.0: # %entry -; CHECK-BE-O0-NEXT: # kill: def $r5 killed $r5 killed $x5 -; CHECK-BE-O0-NEXT: mtfprwz 0, 5 +; CHECK-BE-O0-NEXT: mr 3, 5 +; CHECK-BE-O0-NEXT: mtfprwz 0, 3 ; CHECK-BE-O0-NEXT: xscpsgndp 35, 0, 0 ; CHECK-BE-O0-NEXT: vinsertb 2, 3, 15 ; CHECK-BE-O0-NEXT: blr Index: llvm/test/CodeGen/PowerPC/popcount.ll =================================================================== --- llvm/test/CodeGen/PowerPC/popcount.ll +++ llvm/test/CodeGen/PowerPC/popcount.ll @@ -5,11 +5,12 @@ define i8 @popcount128(i128* nocapture nonnull readonly %0) { ; CHECK-LABEL: popcount128: ; CHECK: # %bb.0: # %Entry -; CHECK-NEXT: ld 4, 0(3) -; CHECK-NEXT: ld 3, 8(3) -; CHECK-NEXT: popcntd 3, 3 +; CHECK-NEXT: mr 4, 3 +; CHECK-NEXT: ld 3, 0(4) +; CHECK-NEXT: ld 4, 8(4) ; CHECK-NEXT: popcntd 4, 4 -; CHECK-NEXT: add 3, 4, 3 +; CHECK-NEXT: popcntd 3, 3 +; CHECK-NEXT: add 3, 3, 4 ; CHECK-NEXT: # kill: def $r3 killed $r3 killed $x3 ; CHECK-NEXT: clrldi 3, 3, 56 ; CHECK-NEXT: blr @@ -27,17 +28,18 @@ define i16 @popcount256(i256* nocapture nonnull readonly %0) { ; CHECK-LABEL: popcount256: ; CHECK: # %bb.0: # %Entry -; CHECK-NEXT: ld 4, 0(3) -; CHECK-NEXT: ld 5, 8(3) -; CHECK-NEXT: ld 6, 16(3) -; CHECK-NEXT: ld 3, 24(3) -; CHECK-NEXT: popcntd 3, 3 +; CHECK-NEXT: mr 6, 3 +; CHECK-NEXT: ld 3, 0(6) +; CHECK-NEXT: ld 5, 8(6) +; CHECK-NEXT: ld 4, 16(6) +; CHECK-NEXT: ld 6, 24(6) ; CHECK-NEXT: popcntd 6, 6 -; CHECK-NEXT: add 3, 6, 3 -; CHECK-NEXT: popcntd 5, 5 ; CHECK-NEXT: popcntd 4, 4 -; CHECK-NEXT: add 4, 4, 5 -; CHECK-NEXT: add 3, 4, 3 +; CHECK-NEXT: add 4, 4, 6 +; CHECK-NEXT: popcntd 5, 5 +; CHECK-NEXT: popcntd 3, 3 +; CHECK-NEXT: add 3, 3, 5 +; CHECK-NEXT: add 3, 3, 4 ; CHECK-NEXT: # kill: def $r3 killed $r3 killed $x3 ; CHECK-NEXT: clrldi 3, 3, 48 ; CHECK-NEXT: blr @@ -57,18 +59,18 @@ ; CHECK-NEXT: xxlor 0, 34, 34 ; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vsl0 ; CHECK-NEXT: mffprd 3, 0 -; CHECK-NEXT: popcntd 3, 3 +; CHECK-NEXT: popcntd 4, 3 ; CHECK-NEXT: xxswapd 0, 34 ; CHECK-NEXT: # kill: def $f0 killed $f0 killed $vsl0 -; CHECK-NEXT: mffprd 4, 0 -; CHECK-NEXT: popcntd 4, 4 -; CHECK-NEXT: add 3, 4, 3 +; CHECK-NEXT: mffprd 3, 0 +; CHECK-NEXT: popcntd 3, 3 +; CHECK-NEXT: add 3, 3, 4 ; CHECK-NEXT: mtfprd 0, 3 -; CHECK-NEXT: # kill: def $vsl0 killed $f0 +; CHECK-NEXT: fmr 1, 0 ; CHECK-NEXT: li 3, 0 -; CHECK-NEXT: mtfprd 1, 3 -; CHECK-NEXT: # kill: def $vsl1 killed $f1 -; CHECK-NEXT: xxmrghd 34, 1, 0 +; CHECK-NEXT: mtfprd 0, 3 +; CHECK-NEXT: # kill: def $vsl0 killed $f0 +; CHECK-NEXT: xxmrghd 34, 0, 1 ; CHECK-NEXT: blr Entry: %1 = tail call <1 x i128> @llvm.ctpop.v1.i128(<1 x i128> %0) Index: llvm/test/CodeGen/PowerPC/spill-nor0.ll =================================================================== --- llvm/test/CodeGen/PowerPC/spill-nor0.ll +++ llvm/test/CodeGen/PowerPC/spill-nor0.ll @@ -12,6 +12,12 @@ if.end: ; preds = %entry %0 = call i64 asm sideeffect "mr 3,$1\0A\09mr 4,$2\0A\09rotldi 0,0,3 ; rotldi 0,0,13\0A\09rotldi 0,0,61 ; rotldi 0,0,51\0A\09or 1,1,1\0A\09mr $0,3", "=b,b,b,~{cc},~{memory},~{r3},~{r4}"(i32 0, i64* undef) #0 + br i1 undef, label %end0, label %end1 ; need successor blocks to force spill + +end0: + unreachable + +end1: unreachable ; CHECK-LABEL: @_ZN4llvm3sys17RunningOnValgrindEv Index: llvm/test/CodeGen/PowerPC/spill-nor0.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/spill-nor0.mir @@ -0,0 +1,17 @@ +# RUN: llc -o - %s -mtriple=powerpc64-- -run-pass=regallocfast | FileCheck %s +--- +# CHECK-LABEL: name: func +name: func +tracksRegLiveness: true +body: | + bb.0: + %0 : gprc = LI 42 + %1 : gprc_nor0 = COPY %0 + ; CHECK: STW + + ; Clobber all regs to force a spill + NOP csr_noregs + + ; CHECK: LWZ + NOP implicit %1 +... Index: llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll =================================================================== --- llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll +++ llvm/test/CodeGen/PowerPC/stack-guard-reassign.ll @@ -6,11 +6,12 @@ ; CHECK: mflr 0 ; CHECK-NEXT: stw 0, 4(1) ; CHECK-NEXT: lis 0, -2 -; CHECK-NEXT: ori 0, 0, 65488 +; CHECK-NEXT: ori 0, 0, 65504 ; CHECK-NEXT: stwux 1, 1, 0 ; CHECK-NEXT: sub 0, 1, 0 ; CHECK-NEXT: lis 4, __stack_chk_guard@ha -; CHECK-NEXT: lwz 5, __stack_chk_guard@l(4) -; CHECK-NEXT: lis 6, 1 -; CHECK-NEXT: ori 6, 6, 44 -; CHECK-NEXT: stwx 5, 1, 6 +; CHECK-NEXT: stw 4, 16(1) +; CHECK-NEXT: lwz 4, __stack_chk_guard@l(4) +; CHECK-NEXT: lis 5, 1 +; CHECK-NEXT: ori 5, 5, 28 +; CHECK-NEXT: stwx 4, 1, 5 Index: llvm/test/CodeGen/PowerPC/vsx-args.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vsx-args.ll +++ llvm/test/CodeGen/PowerPC/vsx-args.ll @@ -24,11 +24,14 @@ ; CHECK: blr ; CHECK-FISL-LABEL: @main -; CHECK-FISL: stxvd2x 34 -; CHECK-FISL: vmr 2, 3 -; CHECK-FISL: vmr 3, 4 -; CHECK-FISL: lxvd2x 36 +; CHECK-FISL: stxvd2x 36, 1, 3 +; CHECK-FISL: vmr 4, 3 +; CHECK-FISL: lxvd2x 35, 1, 3 +; CHECK-FISL: 3, 144 +; CHCEK-FISL: stxvd2x 36, 1, 3 +; CHECK-FISL: vmr 4, 2 ; CHECK-FISL: bl sv + ; CHECK-FISL: lxvd2x [[VC:[0-9]+]], ; CHECK-FISL: xvadddp 34, 34, [[VC]] ; CHECK-FISL: blr @@ -36,4 +39,3 @@ attributes #0 = { noinline nounwind readnone } attributes #1 = { nounwind } - Index: llvm/test/CodeGen/PowerPC/vsx.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vsx.ll +++ llvm/test/CodeGen/PowerPC/vsx.ll @@ -164,8 +164,7 @@ ; ; CHECK-FISL-LABEL: test6: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlxor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlxor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test6: @@ -193,8 +192,7 @@ ; ; CHECK-FISL-LABEL: test7: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlxor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlxor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test7: @@ -250,8 +248,7 @@ ; ; CHECK-FISL-LABEL: test9: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test9: @@ -279,8 +276,7 @@ ; ; CHECK-FISL-LABEL: test10: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test10: @@ -336,8 +332,7 @@ ; ; CHECK-FISL-LABEL: test12: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxland vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxland v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test12: @@ -365,8 +360,7 @@ ; ; CHECK-FISL-LABEL: test13: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxland vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxland v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test13: @@ -424,10 +418,8 @@ ; ; CHECK-FISL-LABEL: test15: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v4, vs0, vs0 -; CHECK-FISL-NEXT: xxlnor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlor v4, v2, v3 +; CHECK-FISL-NEXT: xxlnor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test15: @@ -456,10 +448,8 @@ ; ; CHECK-FISL-LABEL: test16: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v4, vs0, vs0 -; CHECK-FISL-NEXT: xxlnor vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlor v4, v2, v3 +; CHECK-FISL-NEXT: xxlnor v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test16: @@ -518,10 +508,8 @@ ; ; CHECK-FISL-LABEL: test18: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlnor vs0, v3, v3 -; CHECK-FISL-NEXT: xxlor v4, vs0, vs0 -; CHECK-FISL-NEXT: xxlandc vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlnor v4, v3, v3 +; CHECK-FISL-NEXT: xxlandc v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test18: @@ -550,10 +538,8 @@ ; ; CHECK-FISL-LABEL: test19: ; CHECK-FISL: # %bb.0: # %entry -; CHECK-FISL-NEXT: xxlnor vs0, v3, v3 -; CHECK-FISL-NEXT: xxlor v4, vs0, vs0 -; CHECK-FISL-NEXT: xxlandc vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlnor v4, v3, v3 +; CHECK-FISL-NEXT: xxlandc v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test19: @@ -664,10 +650,10 @@ ; CHECK-FISL: # %bb.0: # %entry ; CHECK-FISL-NEXT: xvcmpeqsp vs0, v4, v5 ; CHECK-FISL-NEXT: xvcmpeqsp vs1, v5, v5 +; CHECK-FISL-NEXT: xxlnor vs2, vs1, vs1 +; CHECK-FISL-NEXT: xvcmpeqsp vs1, v4, v4 ; CHECK-FISL-NEXT: xxlnor vs1, vs1, vs1 -; CHECK-FISL-NEXT: xvcmpeqsp vs2, v4, v4 -; CHECK-FISL-NEXT: xxlnor vs2, vs2, vs2 -; CHECK-FISL-NEXT: xxlor vs1, vs2, vs1 +; CHECK-FISL-NEXT: xxlor vs1, vs1, vs2 ; CHECK-FISL-NEXT: xxlor vs0, vs0, vs1 ; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-FISL-NEXT: blr @@ -708,8 +694,8 @@ ; CHECK-FISL-LABEL: test23: ; CHECK-FISL: # %bb.0: # %entry ; CHECK-FISL-NEXT: vcmpequh v4, v4, v5 -; CHECK-FISL-NEXT: xxsel vs0, v3, v2, v4 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlor vs0, v4, v4 +; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test23: @@ -742,8 +728,8 @@ ; CHECK-FISL-LABEL: test24: ; CHECK-FISL: # %bb.0: # %entry ; CHECK-FISL-NEXT: vcmpequb v4, v4, v5 -; CHECK-FISL-NEXT: xxsel vs0, v3, v2, v4 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlor vs0, v4, v4 +; CHECK-FISL-NEXT: xxsel v2, v3, v2, vs0 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test24: @@ -835,17 +821,16 @@ ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: ld r3, -24(r1) -; CHECK-FISL-NEXT: ld r4, -40(r1) -; CHECK-FISL-NEXT: add r3, r4, r3 +; CHECK-FISL-NEXT: ld r4, -24(r1) +; CHECK-FISL-NEXT: ld r3, -40(r1) +; CHECK-FISL-NEXT: add r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: ld r3, -32(r1) -; CHECK-FISL-NEXT: ld r4, -48(r1) -; CHECK-FISL-NEXT: add r3, r4, r3 +; CHECK-FISL-NEXT: ld r4, -32(r1) +; CHECK-FISL-NEXT: ld r3, -48(r1) +; CHECK-FISL-NEXT: add r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test26: @@ -875,8 +860,7 @@ ; ; CHECK-FISL-LABEL: test27: ; CHECK-FISL: # %bb.0: -; CHECK-FISL-NEXT: xxland vs0, v2, v3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxland v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test27: @@ -1010,8 +994,7 @@ ; ; CHECK-FISL-LABEL: test30: ; CHECK-FISL: # %bb.0: -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test30: @@ -1129,10 +1112,10 @@ ; CHECK-FISL-LABEL: test32u: ; CHECK-FISL: # %bb.0: ; CHECK-FISL-NEXT: li r4, 15 -; CHECK-FISL-NEXT: lvx v2, r3, r4 -; CHECK-FISL-NEXT: lvsl v3, 0, r3 -; CHECK-FISL-NEXT: lvx v4, 0, r3 -; CHECK-FISL-NEXT: vperm v2, v4, v2, v3 +; CHECK-FISL-NEXT: lvx v3, r3, r4 +; CHECK-FISL-NEXT: lvsl v4, 0, r3 +; CHECK-FISL-NEXT: lvx v2, 0, r3 +; CHECK-FISL-NEXT: vperm v2, v2, v3, v4 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test32u: @@ -1390,10 +1373,10 @@ ; CHECK-FISL-NEXT: fcfidus f0, f0 ; CHECK-FISL-NEXT: stfs f0, -64(r1) ; CHECK-FISL-NEXT: addi r3, r1, -48 -; CHECK-FISL-NEXT: lxvw4x v2, 0, r3 -; CHECK-FISL-NEXT: addi r3, r1, -64 ; CHECK-FISL-NEXT: lxvw4x v3, 0, r3 -; CHECK-FISL-NEXT: vmrghw v2, v3, v2 +; CHECK-FISL-NEXT: addi r3, r1, -64 +; CHECK-FISL-NEXT: lxvw4x v2, 0, r3 +; CHECK-FISL-NEXT: vmrghw v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test44: @@ -1472,10 +1455,10 @@ ; CHECK-FISL-NEXT: fcfids f0, f0 ; CHECK-FISL-NEXT: stfs f0, -64(r1) ; CHECK-FISL-NEXT: addi r3, r1, -48 -; CHECK-FISL-NEXT: lxvw4x v2, 0, r3 -; CHECK-FISL-NEXT: addi r3, r1, -64 ; CHECK-FISL-NEXT: lxvw4x v3, 0, r3 -; CHECK-FISL-NEXT: vmrghw v2, v3, v2 +; CHECK-FISL-NEXT: addi r3, r1, -64 +; CHECK-FISL-NEXT: lxvw4x v2, 0, r3 +; CHECK-FISL-NEXT: vmrghw v2, v2, v3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test45: @@ -1548,8 +1531,7 @@ ; CHECK-FISL-NEXT: ld r3, -24(r1) ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test46: @@ -1616,8 +1598,7 @@ ; CHECK-FISL-NEXT: ld r3, -24(r1) ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test47: @@ -1859,17 +1840,16 @@ ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r3, -20(r1) -; CHECK-FISL-NEXT: ld r4, -40(r1) -; CHECK-FISL-NEXT: sld r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -20(r1) +; CHECK-FISL-NEXT: ld r3, -40(r1) +; CHECK-FISL-NEXT: sld r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r3, -28(r1) -; CHECK-FISL-NEXT: ld r4, -48(r1) -; CHECK-FISL-NEXT: sld r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -28(r1) +; CHECK-FISL-NEXT: ld r3, -48(r1) +; CHECK-FISL-NEXT: sld r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test60: @@ -1925,17 +1905,16 @@ ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r3, -20(r1) -; CHECK-FISL-NEXT: ld r4, -40(r1) -; CHECK-FISL-NEXT: srd r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -20(r1) +; CHECK-FISL-NEXT: ld r3, -40(r1) +; CHECK-FISL-NEXT: srd r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r3, -28(r1) -; CHECK-FISL-NEXT: ld r4, -48(r1) -; CHECK-FISL-NEXT: srd r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -28(r1) +; CHECK-FISL-NEXT: ld r3, -48(r1) +; CHECK-FISL-NEXT: srd r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test61: @@ -1991,17 +1970,16 @@ ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r3, -20(r1) -; CHECK-FISL-NEXT: ld r4, -40(r1) -; CHECK-FISL-NEXT: srad r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -20(r1) +; CHECK-FISL-NEXT: ld r3, -40(r1) +; CHECK-FISL-NEXT: srad r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r3, -28(r1) -; CHECK-FISL-NEXT: ld r4, -48(r1) -; CHECK-FISL-NEXT: srad r3, r4, r3 +; CHECK-FISL-NEXT: lwz r4, -28(r1) +; CHECK-FISL-NEXT: ld r3, -48(r1) +; CHECK-FISL-NEXT: srad r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test62: @@ -2027,7 +2005,6 @@ ; ; CHECK-FISL-LABEL: test63: ; CHECK-FISL: # %bb.0: -; CHECK-FISL-NEXT: # kill: def $vf2 killed $vf2 killed $v2 ; CHECK-FISL-NEXT: xxlor f1, v2, v2 ; CHECK-FISL-NEXT: blr ; @@ -2059,7 +2036,6 @@ ; CHECK-FISL-LABEL: test64: ; CHECK-FISL: # %bb.0: ; CHECK-FISL-NEXT: xxswapd vs0, v2 -; CHECK-FISL-NEXT: # kill: def $f0 killed $f0 killed $vsl0 ; CHECK-FISL-NEXT: fmr f1, f0 ; CHECK-FISL-NEXT: blr ; @@ -2117,8 +2093,7 @@ ; CHECK-FISL-LABEL: test66: ; CHECK-FISL: # %bb.0: ; CHECK-FISL-NEXT: vcmpequw v2, v2, v3 -; CHECK-FISL-NEXT: xxlnor vs0, v2, v2 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: xxlnor v2, v2, v2 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test66: @@ -2184,21 +2159,20 @@ ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: ld r3, -24(r1) -; CHECK-FISL-NEXT: ld r4, -40(r1) -; CHECK-FISL-NEXT: cmpld r4, r3 -; CHECK-FISL-NEXT: li r3, 0 -; CHECK-FISL-NEXT: li r4, -1 -; CHECK-FISL-NEXT: isellt r5, r4, r3 +; CHECK-FISL-NEXT: ld r4, -24(r1) +; CHECK-FISL-NEXT: ld r3, -40(r1) +; CHECK-FISL-NEXT: cmpld r3, r4 +; CHECK-FISL-NEXT: li r4, 0 +; CHECK-FISL-NEXT: li r3, -1 +; CHECK-FISL-NEXT: isellt r5, r3, r4 ; CHECK-FISL-NEXT: std r5, -8(r1) -; CHECK-FISL-NEXT: ld r5, -32(r1) -; CHECK-FISL-NEXT: ld r6, -48(r1) -; CHECK-FISL-NEXT: cmpld r6, r5 -; CHECK-FISL-NEXT: isellt r3, r4, r3 +; CHECK-FISL-NEXT: ld r6, -32(r1) +; CHECK-FISL-NEXT: ld r5, -48(r1) +; CHECK-FISL-NEXT: cmpld r5, r6 +; CHECK-FISL-NEXT: isellt r3, r3, r4 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test67: @@ -2284,15 +2258,15 @@ ; CHECK-FISL-NEXT: addi r3, r3, .LCPI63_0@toc@l ; CHECK-FISL-NEXT: lxvw4x v3, 0, r3 ; CHECK-FISL-NEXT: vperm v2, v2, v2, v3 +; CHECK-FISL-NEXT: xxlor vs0, v2, v2 ; CHECK-FISL-NEXT: addi r3, r1, -32 -; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 +; CHECK-FISL-NEXT: stxvd2x vs0, 0, r3 ; CHECK-FISL-NEXT: lha r3, -18(r1) ; CHECK-FISL-NEXT: std r3, -8(r1) ; CHECK-FISL-NEXT: lha r3, -26(r1) ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: xvcvsxddp v2, v2 ; CHECK-FISL-NEXT: blr ; @@ -2362,8 +2336,9 @@ ; CHECK-FISL-NEXT: addi r3, r3, .LCPI64_0@toc@l ; CHECK-FISL-NEXT: lxvw4x v3, 0, r3 ; CHECK-FISL-NEXT: vperm v2, v2, v2, v3 +; CHECK-FISL-NEXT: xxlor vs0, v2, v2 ; CHECK-FISL-NEXT: addi r3, r1, -32 -; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 +; CHECK-FISL-NEXT: stxvd2x vs0, 0, r3 ; CHECK-FISL-NEXT: ld r3, -24(r1) ; CHECK-FISL-NEXT: extsb r3, r3 ; CHECK-FISL-NEXT: std r3, -8(r1) @@ -2371,8 +2346,7 @@ ; CHECK-FISL-NEXT: extsb r3, r3 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 +; CHECK-FISL-NEXT: lxvd2x v2, 0, r3 ; CHECK-FISL-NEXT: xvcvsxddp v2, v2 ; CHECK-FISL-NEXT: blr ; @@ -2494,16 +2468,16 @@ ; ; CHECK-FISL-LABEL: test82: ; CHECK-FISL: # %bb.0: # %entry +; CHECK-FISL-NEXT: stfd f2, -16(r1) # 8-byte Folded Spill +; CHECK-FISL-NEXT: fmr f2, f1 ; CHECK-FISL-NEXT: xscmpudp cr0, f3, f4 ; CHECK-FISL-NEXT: stfd f2, -8(r1) # 8-byte Folded Spill -; CHECK-FISL-NEXT: stfd f1, -16(r1) # 8-byte Folded Spill ; CHECK-FISL-NEXT: beq cr0, .LBB67_2 ; CHECK-FISL-NEXT: # %bb.1: # %entry -; CHECK-FISL-NEXT: lfd f0, -8(r1) # 8-byte Folded Reload -; CHECK-FISL-NEXT: stfd f0, -16(r1) # 8-byte Folded Spill -; CHECK-FISL-NEXT: .LBB67_2: # %entry ; CHECK-FISL-NEXT: lfd f0, -16(r1) # 8-byte Folded Reload -; CHECK-FISL-NEXT: fmr f1, f0 +; CHECK-FISL-NEXT: stfd f0, -8(r1) # 8-byte Folded Spill +; CHECK-FISL-NEXT: .LBB67_2: # %entry +; CHECK-FISL-NEXT: lfd f1, -8(r1) # 8-byte Folded Reload ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test82: Index: llvm/test/CodeGen/SPARC/fp16-promote.ll =================================================================== --- llvm/test/CodeGen/SPARC/fp16-promote.ll +++ llvm/test/CodeGen/SPARC/fp16-promote.ll @@ -124,15 +124,12 @@ ; ; V8-UNOPT-LABEL: test_fptrunc_float: ; V8-UNOPT: ! %bb.0: -; V8-UNOPT-NEXT: save %sp, -104, %sp -; V8-UNOPT-NEXT: st %i0, [%fp+-4] -; V8-UNOPT-NEXT: ld [%fp+-4], %f0 +; V8-UNOPT-NEXT: save %sp, -96, %sp ; V8-UNOPT-NEXT: mov %i0, %o0 -; V8-UNOPT-NEXT: st %i1, [%fp+-8] ! 4-byte Folded Spill +; V8-UNOPT-NEXT: st %o0, [%fp+-4] ; V8-UNOPT-NEXT: call __gnu_f2h_ieee -; V8-UNOPT-NEXT: st %f0, [%fp+-12] -; V8-UNOPT-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload -; V8-UNOPT-NEXT: sth %o0, [%i0] +; V8-UNOPT-NEXT: ld [%fp+-4], %f0 +; V8-UNOPT-NEXT: sth %o0, [%i1] ; V8-UNOPT-NEXT: ret ; V8-UNOPT-NEXT: restore ; @@ -176,21 +173,19 @@ ; V8-UNOPT-LABEL: test_fptrunc_double: ; V8-UNOPT: ! %bb.0: ; V8-UNOPT-NEXT: save %sp, -112, %sp -; V8-UNOPT-NEXT: ! implicit-def: $i4_i5 +; V8-UNOPT-NEXT: mov %i1, %i3 ; V8-UNOPT-NEXT: mov %i0, %i4 -; V8-UNOPT-NEXT: mov %i1, %i5 -; V8-UNOPT-NEXT: std %i4, [%fp+-8] +; V8-UNOPT-NEXT: ! implicit-def: $i0_i1 +; V8-UNOPT-NEXT: mov %i4, %i0 +; V8-UNOPT-NEXT: mov %i3, %i1 +; V8-UNOPT-NEXT: std %i0, [%fp+-8] ; V8-UNOPT-NEXT: ldd [%fp+-8], %f0 ; V8-UNOPT-NEXT: std %f0, [%fp+-16] ; V8-UNOPT-NEXT: ldd [%fp+-16], %i0 -; V8-UNOPT-NEXT: mov %i0, %i3 -; V8-UNOPT-NEXT: ! kill: def $i1 killed $i1 killed $i0_i1 -; V8-UNOPT-NEXT: mov %i3, %o0 -; V8-UNOPT-NEXT: mov %i1, %o1 +; V8-UNOPT-NEXT: mov %i0, %o0 ; V8-UNOPT-NEXT: call __truncdfhf2 -; V8-UNOPT-NEXT: st %i2, [%fp+-20] -; V8-UNOPT-NEXT: ld [%fp+-20], %i0 ! 4-byte Folded Reload -; V8-UNOPT-NEXT: sth %o0, [%i0] +; V8-UNOPT-NEXT: mov %i1, %o1 +; V8-UNOPT-NEXT: sth %o0, [%i2] ; V8-UNOPT-NEXT: ret ; V8-UNOPT-NEXT: restore ; @@ -241,21 +236,18 @@ ; ; V8-UNOPT-LABEL: test_fadd: ; V8-UNOPT: ! %bb.0: -; V8-UNOPT-NEXT: save %sp, -112, %sp -; V8-UNOPT-NEXT: lduh [%i0], %o0 -; V8-UNOPT-NEXT: st %i1, [%fp+-8] ! 4-byte Folded Spill +; V8-UNOPT-NEXT: save %sp, -104, %sp ; V8-UNOPT-NEXT: call __gnu_h2f_ieee -; V8-UNOPT-NEXT: st %i0, [%fp+-12] -; V8-UNOPT-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload ; V8-UNOPT-NEXT: lduh [%i0], %o0 +; V8-UNOPT-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill ; V8-UNOPT-NEXT: call __gnu_h2f_ieee -; V8-UNOPT-NEXT: st %f0, [%fp+-16] -; V8-UNOPT-NEXT: ld [%fp+-16], %f1 ! 4-byte Folded Reload -; V8-UNOPT-NEXT: fadds %f1, %f0, %f0 +; V8-UNOPT-NEXT: lduh [%i1], %o0 +; V8-UNOPT-NEXT: fmovs %f0, %f1 +; V8-UNOPT-NEXT: ld [%fp+-8], %f0 ! 4-byte Folded Reload +; V8-UNOPT-NEXT: fadds %f0, %f1, %f0 ; V8-UNOPT-NEXT: st %f0, [%fp+-4] ; V8-UNOPT-NEXT: call __gnu_f2h_ieee ; V8-UNOPT-NEXT: ld [%fp+-4], %o0 -; V8-UNOPT-NEXT: ld [%fp+-12], %i0 ! 4-byte Folded Reload ; V8-UNOPT-NEXT: sth %o0, [%i0] ; V8-UNOPT-NEXT: ret ; V8-UNOPT-NEXT: restore @@ -318,21 +310,18 @@ ; ; V8-UNOPT-LABEL: test_fmul: ; V8-UNOPT: ! %bb.0: -; V8-UNOPT-NEXT: save %sp, -112, %sp -; V8-UNOPT-NEXT: lduh [%i0], %o0 -; V8-UNOPT-NEXT: st %i1, [%fp+-8] ! 4-byte Folded Spill +; V8-UNOPT-NEXT: save %sp, -104, %sp ; V8-UNOPT-NEXT: call __gnu_h2f_ieee -; V8-UNOPT-NEXT: st %i0, [%fp+-12] -; V8-UNOPT-NEXT: ld [%fp+-8], %i0 ! 4-byte Folded Reload ; V8-UNOPT-NEXT: lduh [%i0], %o0 +; V8-UNOPT-NEXT: st %f0, [%fp+-8] ! 4-byte Folded Spill ; V8-UNOPT-NEXT: call __gnu_h2f_ieee -; V8-UNOPT-NEXT: st %f0, [%fp+-16] -; V8-UNOPT-NEXT: ld [%fp+-16], %f1 ! 4-byte Folded Reload -; V8-UNOPT-NEXT: fmuls %f1, %f0, %f0 +; V8-UNOPT-NEXT: lduh [%i1], %o0 +; V8-UNOPT-NEXT: fmovs %f0, %f1 +; V8-UNOPT-NEXT: ld [%fp+-8], %f0 ! 4-byte Folded Reload +; V8-UNOPT-NEXT: fmuls %f0, %f1, %f0 ; V8-UNOPT-NEXT: st %f0, [%fp+-4] ; V8-UNOPT-NEXT: call __gnu_f2h_ieee ; V8-UNOPT-NEXT: ld [%fp+-4], %o0 -; V8-UNOPT-NEXT: ld [%fp+-12], %i0 ! 4-byte Folded Reload ; V8-UNOPT-NEXT: sth %o0, [%i0] ; V8-UNOPT-NEXT: ret ; V8-UNOPT-NEXT: restore Index: llvm/test/CodeGen/SystemZ/swift-return.ll =================================================================== --- llvm/test/CodeGen/SystemZ/swift-return.ll +++ llvm/test/CodeGen/SystemZ/swift-return.ll @@ -14,10 +14,9 @@ ; CHECK-O0-LABEL: test ; CHECK-O0: st %r2 ; CHECK-O0: brasl %r14, gen -; CHECK-O0-DAG: lhr %[[REG1:r[0-9]+]], %r2 +; CHECK-O0-DAG: lhr %r2, %r2 ; CHECK-O0-DAG: lbr %[[REG2:r[0-9]+]], %r3 -; CHECK-O0: ar %[[REG1]], %[[REG2]] -; CHECK-O0: lr %r2, %[[REG1]] +; CHECK-O0: ar %r2, %[[REG2]] define i16 @test(i32 %key) { entry: %key.addr = alloca i32, align 4 @@ -61,7 +60,6 @@ ; CHECK-O0: ar ; CHECK-O0: ar ; CHECK-O0: ar -; CHECK-O0: lr %r2 define i32 @test2(i32 %key) #0 { entry: %key.addr = alloca i32, align 4 Index: llvm/test/CodeGen/SystemZ/swifterror.ll =================================================================== --- llvm/test/CodeGen/SystemZ/swifterror.ll +++ llvm/test/CodeGen/SystemZ/swifterror.ll @@ -16,7 +16,7 @@ ; CHECK-O0-LABEL: foo: ; CHECK-O0: lghi %r2, 16 ; CHECK-O0: brasl %r14, malloc -; CHECK-O0: lgr %r0, %r2 +; CHECK-O0: lgr [[T0:%r[0-9]+]], %r2 ; CHECK-O0: mvi 8(%r2), 1 entry: %call = call i8* @malloc(i64 16) @@ -118,19 +118,17 @@ ; CHECK-NOT: %r9 ; CHECK: br %r14 ; CHECK-O0-LABEL: foo_if: -; CHECK-O0: chi %r2, 0 ; spill to stack ; CHECK-O0: stg %r9, [[OFFS:[0-9]+]](%r15) +; CHECK-O0: chi %r2, 0 ; CHECK-O0: je ; CHECK-O0: lghi %r2, 16 ; CHECK-O0: brasl %r14, malloc -; CHECK-O0: lgr %r[[REG1:[0-9]+]], %r2 +; CHECK-O0: lgr %r9, %r2 ; CHECK-O0: mvi 8(%r2), 1 -; CHECK-O0: lgr %r9, %r[[REG1]] ; CHECK-O0: br %r14 ; reload from stack -; CHECK-O0: lg %r[[REG2:[0-9]+]], [[OFFS]](%r15) -; CHECK-O0: lgr %r9, %r[[REG2]] +; CHECK-O0: lg %r9, [[OFFS]](%r15) ; CHECK-O0: br %r14 entry: %cond = icmp ne i32 %cc, 0 @@ -169,11 +167,10 @@ ; CHECK-O0: lghi %r2, 16 ; CHECK-O0: brasl %r14, malloc ; CHECK-O0: lgr %r[[REG1:[0-9]+]], %r2 -; CHECK-O0: mvi 8(%r2), 1 +; CHECK-O0: mvi 8(%r[[REG1]]), 1 ; CHECK-O0: jnh ; reload from stack -; CHECK-O0: lg %r[[REG2:[0-9]+]], [[OFFS:[0-9]+]](%r15) -; CHECK-O0: lgr %r9, %r[[REG2]] +; CHECK-O0: lg %r9, [[OFFS:[0-9]+]](%r15) ; CHECK-O0: br %r14 entry: br label %bb_loop @@ -214,18 +211,17 @@ ; CHECK: br %r14 ; CHECK-O0-LABEL: foo_sret: -; CHECK-O0: lghi %r{{.*}}, 16 ; spill sret to stack -; CHECK-O0: stg %r2, [[OFFS1:[0-9]+]](%r15) -; CHECK-O0: lgr %r2, %r{{.*}} -; CHECK-O0: st %r3, [[OFFS2:[0-9]+]](%r15) +; CHECK-O0-DAG: stg %r2, [[OFFS1:[0-9]+]](%r15) +; CHECK-O0-DAG: st %r3, [[OFFS2:[0-9]+]](%r15) +; CHECK-O0: lghi %r2, 16 ; CHECK-O0: brasl %r14, malloc -; CHECK-O0: lgr {{.*}}, %r2 -; CHECK-O0: mvi 8(%r2), 1 +; CHECK-O0-DAG: lgr %r[[REG3:[0-9]+]], %r2 +; CHECK-O0-DAG: mvi 8(%r[[REG3]]), 1 ; CHECK-O0-DAG: lg %r[[REG1:[0-9]+]], [[OFFS1]](%r15) +; CHECK-O0-DAG: lgr %r9, %r[[REG3]] ; CHECK-O0-DAG: l %r[[REG2:[0-9]+]], [[OFFS2]](%r15) ; CHECK-O0: st %r[[REG2]], 4(%r[[REG1]]) -; CHECK-O0: lgr %r9, {{.*}} ; CHECK-O0: br %r14 entry: %call = call i8* @malloc(i64 16) @@ -255,8 +251,6 @@ ; CHECK-O0-LABEL: caller3: ; CHECK-O0: lghi %r9, 0 ; CHECK-O0: lhi %r3, 1 -; CHECK-O0: stg %r2, {{.*}}(%r15) -; CHECK-O0: lgr %r2, {{.*}} ; CHECK-O0: brasl %r14, foo_sret ; CHECK-O0: lgr {{.*}}, %r9 ; CHECK-O0: cghi %r9, 0 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/branch-targets.ll @@ -404,7 +404,7 @@ } ; CHECK-MID: check_negated_xor_wls -; CHECK-MID: t2WhileLoopStart killed renamable $r2, %bb.3 +; CHECK-MID: t2WhileLoopStart renamable $r2, %bb.3 ; CHECK-MID: tB %bb.1 ; CHECK-MID: bb.1.while.body.preheader: ; CHECK-MID: $lr = t2LoopDec killed renamable $lr, 1 @@ -437,7 +437,7 @@ } ; CHECK-MID: check_negated_cmp_wls -; CHECK-MID: t2WhileLoopStart killed renamable $r2, %bb.3 +; CHECK-MID: t2WhileLoopStart renamable $r2, %bb.3 ; CHECK-MID: tB %bb.1 ; CHECK-MID: bb.1.while.body.preheader: ; CHECK-MID: $lr = t2LoopDec killed renamable $lr, 1 Index: llvm/test/CodeGen/Thumb2/high-reg-spill.mir =================================================================== --- llvm/test/CodeGen/Thumb2/high-reg-spill.mir +++ llvm/test/CodeGen/Thumb2/high-reg-spill.mir @@ -38,10 +38,8 @@ bb.0.entry: ; CHECK-LABEL: name: constraint_h ; CHECK: renamable $r0 = tLDRspi %stack.0.i, 0, 14 /* CC::al */, $noreg :: (dereferenceable load 4 from %ir.i) - ; CHECK: renamable $r12 = COPY killed renamable $r0 - ; CHECK: t2STRi12 killed $r12, %stack.1, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.1) - ; CHECK: $r8 = t2LDRi12 %stack.1, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.1) - ; CHECK: INLINEASM &"@ $0", 1 /* sideeffect attdialect */, 589833 /* reguse:GPRnopc */, renamable $r8, 12 /* clobber */, implicit-def early-clobber $r12 + ; CHECK: renamable $r8 = COPY killed renamable $r0 + ; CHECK: INLINEASM &"@ $0", 1 /* sideeffect attdialect */, 589833 /* reguse:GPRnopc */, killed renamable $r8, 12 /* clobber */, implicit-def dead early-clobber $r12 ; CHECK: tBX_RET 14 /* CC::al */, $noreg %1:tgpr = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) %0:hgpr = COPY %1 Index: llvm/test/CodeGen/Thumb2/mve-vector-spill.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vector-spill.ll +++ llvm/test/CodeGen/Thumb2/mve-vector-spill.ll @@ -10,13 +10,11 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill ; CHECK-NEXT: bl external_function +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: pop {r7, pc} @@ -33,13 +31,11 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill ; CHECK-NEXT: bl external_function +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: vstrh.16 q0, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: pop {r7, pc} @@ -56,13 +52,11 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill ; CHECK-NEXT: bl external_function +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: vstrb.8 q0, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: pop {r7, pc} @@ -79,13 +73,11 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill ; CHECK-NEXT: bl external_function +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: pop {r7, pc} @@ -102,13 +94,11 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill ; CHECK-NEXT: bl external_function +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: pop {r7, pc} @@ -125,13 +115,11 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .pad #40 ; CHECK-NEXT: sub sp, #40 -; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: str r0, [sp, #36] @ 4-byte Spill +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill -; CHECK-NEXT: vstrw.32 q1, [sp] @ 16-byte Spill ; CHECK-NEXT: bl external_function +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload ; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: ldr r0, [sp, #36] @ 4-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r0] ; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: pop {r7, pc} @@ -146,15 +134,15 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: .pad #24 -; CHECK-NEXT: sub sp, #24 -; CHECK-NEXT: str r0, [sp, #20] @ 4-byte Spill -; CHECK-NEXT: vstrw.32 q0, [sp] @ 16-byte Spill +; CHECK-NEXT: .pad #40 +; CHECK-NEXT: sub sp, #40 +; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill ; CHECK-NEXT: bl external_function -; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload -; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload +; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload ; CHECK-NEXT: vstrw.32 q0, [r0] -; CHECK-NEXT: add sp, #24 +; CHECK-NEXT: add sp, #40 ; CHECK-NEXT: pop {r7, pc} entry: call void @external_function() Index: llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll =================================================================== --- llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll +++ llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll @@ -20,18 +20,15 @@ ; CHECK-NEXT: movb $15, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movl $8, %ecx -; CHECK-NEXT: leal {{[0-9]+}}(%esp), %edx ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: movl %eax, %edi -; CHECK-NEXT: movl %edx, %esi ; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: addl $36, %ecx ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload -; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload +; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: addl $36, %edi ; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) ; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl ; CHECK-NEXT: movb %cl, 32(%eax) @@ -42,11 +39,11 @@ ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: ## %bb.1: ## %return ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl L___stack_chk_guard$non_lazy_ptr, %ecx -; CHECK-NEXT: movl (%ecx), %ecx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-NEXT: cmpl %edx, %ecx ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl L___stack_chk_guard$non_lazy_ptr, %eax +; CHECK-NEXT: movl (%eax), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: cmpl %ecx, %eax ; CHECK-NEXT: jne LBB0_3 ; CHECK-NEXT: ## %bb.2: ## %SP_return ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload Index: llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll =================================================================== --- llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll +++ llvm/test/CodeGen/X86/2010-06-28-FastAllocTiedOperand.ll @@ -10,12 +10,8 @@ define i32 @func(i8* %s) nounwind ssp { ; CHECK-LABEL: func: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: pushl %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, (%esp) ## 4-byte Spill -; CHECK-NEXT: movl (%esp), %ecx ## 4-byte Reload +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: ## InlineAsm Start ; CHECK-NEXT: arg0 %eax ; CHECK-NEXT: arg1 %ecx @@ -23,10 +19,7 @@ ; CHECK-NEXT: arg3 %esi ; CHECK-NEXT: arg4 %ecx ; CHECK-NEXT: ## InlineAsm End -; CHECK-NEXT: movl %ecx, %edi -; CHECK-NEXT: addl $4, %esp ; CHECK-NEXT: popl %esi -; CHECK-NEXT: popl %edi ; CHECK-NEXT: retl entry: %0 = tail call %asmtype asm "arg0 $0\0A\09arg1 $1\0A\09arg2 $2\0A\09arg3 $3\0A\09arg4 $4", "={ax},=r,=r,=r,1,~{dirflag},~{fpsr},~{flags}"(i8* %s) nounwind, !srcloc !0 ; <%0> [#uses=1] Index: llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll =================================================================== --- llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll +++ llvm/test/CodeGen/X86/2013-10-14-FastISel-incorrect-vreg.ll @@ -15,20 +15,20 @@ ; CHECK-LABEL: @test_bitcast ; Load the value of the function pointer: %loaded_ptr -; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] ; Spill %arg2. ; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]] +; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] ; Spill %loaded_ptr. ; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]] ; Perform the indirect call. -; Load the first argument -; CHECK: movq [[ARG2_SLOT]], %rdi -; Load the second argument -; CHECK: movq [[ARG2_SLOT]], %rsi -; Load the third argument -; CHECK: movq [[ARG2_SLOT]], %rdx ; Load the function pointer. ; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]] +; Load the third argument +; CHECK: movq [[ARG2_SLOT]], %rdx +; Load the first argument +; CHECK: movq %rdx, %rdi +; Load the second argument +; CHECK: movq %rdx, %rsi ; Call. ; CHECK: callq *[[FCT_PTR]] ; CHECK: ret @@ -54,20 +54,20 @@ ; CHECK-LABEL: @test_inttoptr ; Load the value of the function pointer: %loaded_ptr -; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] -; Spill %arg2. ; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]] ; Spill %loaded_ptr. +; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] +; Spill %arg2. ; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]] ; Perform the indirect call. -; Load the first argument -; CHECK: movq [[ARG2_SLOT]], %rdi -; Load the second argument -; CHECK: movq [[ARG2_SLOT]], %rsi -; Load the third argument -; CHECK: movq [[ARG2_SLOT]], %rdx ; Load the function pointer. ; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]] +; Load the third argument +; CHECK: movq [[ARG2_SLOT]], %rdx +; Load the first argument +; CHECK: movq %rdx, %rdi +; Load the second argument +; CHECK: movq %rdx, %rsi ; Call. ; CHECK: callq *[[FCT_PTR]] ; CHECK: ret @@ -92,21 +92,21 @@ } ; CHECK-LABEL: @test_ptrtoint -; Load the value of the function pointer: %loaded_ptr -; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] ; Spill %arg2. ; CHECK: movq %rdx, [[ARG2_SLOT:[0-9]*\(%[a-z]+\)]] +; Load the value of the function pointer: %loaded_ptr +; CHECK: movq (%rdi), [[LOADED_PTR:%[a-z]+]] ; Spill %loaded_ptr. ; CHECK: movq [[LOADED_PTR]], [[LOADED_PTR_SLOT:[0-9]*\(%[a-z]+\)]] ; Perform the indirect call. -; Load the first argument -; CHECK: movq [[ARG2_SLOT]], %rdi -; Load the second argument -; CHECK: movq [[ARG2_SLOT]], %rsi -; Load the third argument -; CHECK: movq [[ARG2_SLOT]], %rdx ; Load the function pointer. ; CHECK: movq [[LOADED_PTR_SLOT]], [[FCT_PTR:%[a-z]+]] +; Load the third argument +; CHECK: movq [[ARG2_SLOT]], %rdx +; Load the first argument +; CHECK: movq %rdx, %rdi +; Load the second argument +; CHECK: movq %rdx, %rsi ; Call. ; CHECK: callq *[[FCT_PTR]] ; CHECK: ret Index: llvm/test/CodeGen/X86/atomic-monotonic.ll =================================================================== --- llvm/test/CodeGen/X86/atomic-monotonic.ll +++ llvm/test/CodeGen/X86/atomic-monotonic.ll @@ -14,8 +14,8 @@ define void @store_i8(i8* %ptr, i8 %v) { ; CHECK-O0-LABEL: store_i8: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $esi -; CHECK-O0-NEXT: movb %sil, (%rdi) +; CHECK-O0-NEXT: movb %sil, %al +; CHECK-O0-NEXT: movb %al, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: store_i8: @@ -44,8 +44,8 @@ define void @store_i16(i16* %ptr, i16 %v) { ; CHECK-O0-LABEL: store_i16: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: # kill: def $si killed $si killed $esi -; CHECK-O0-NEXT: movw %si, (%rdi) +; CHECK-O0-NEXT: movw %si, %ax +; CHECK-O0-NEXT: movw %ax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: store_i16: Index: llvm/test/CodeGen/X86/atomic-unordered.ll =================================================================== --- llvm/test/CodeGen/X86/atomic-unordered.ll +++ llvm/test/CodeGen/X86/atomic-unordered.ll @@ -16,8 +16,8 @@ define void @store_i8(i8* %ptr, i8 %v) { ; CHECK-O0-LABEL: store_i8: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $esi -; CHECK-O0-NEXT: movb %sil, (%rdi) +; CHECK-O0-NEXT: movb %sil, %al +; CHECK-O0-NEXT: movb %al, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: store_i8: @@ -46,8 +46,8 @@ define void @store_i16(i16* %ptr, i16 %v) { ; CHECK-O0-LABEL: store_i16: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: # kill: def $si killed $si killed $esi -; CHECK-O0-NEXT: movw %si, (%rdi) +; CHECK-O0-NEXT: movw %si, %ax +; CHECK-O0-NEXT: movw %ax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: store_i16: @@ -231,11 +231,10 @@ ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O0-NEXT: .cfi_offset %rbx, -16 ; CHECK-O0-NEXT: xorl %eax, %eax -; CHECK-O0-NEXT: # kill: def $rax killed $eax -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; CHECK-O0-NEXT: movl %eax, %ebx +; CHECK-O0-NEXT: movq %rbx, %rax +; CHECK-O0-NEXT: movq %rbx, %rdx +; CHECK-O0-NEXT: movq %rbx, %rcx ; CHECK-O0-NEXT: lock cmpxchg16b (%rdi) ; CHECK-O0-NEXT: popq %rbx ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 @@ -264,24 +263,24 @@ ; CHECK-O0-NEXT: pushq %rbx ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O0-NEXT: .cfi_offset %rbx, -16 -; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: movq 8(%rdi), %rcx ; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movq (%rdi), %rax +; CHECK-O0-NEXT: movq 8(%rdi), %rdx ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: jmp .LBB16_1 ; CHECK-O0-NEXT: .LBB16_1: # %atomicrmw.start ; CHECK-O0-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; CHECK-O0-NEXT: lock cmpxchg16b (%rsi) -; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: jne .LBB16_1 ; CHECK-O0-NEXT: jmp .LBB16_2 ; CHECK-O0-NEXT: .LBB16_2: # %atomicrmw.end @@ -317,24 +316,22 @@ ; CHECK-O0-NEXT: subq $56, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 64 ; CHECK-O0-NEXT: movq %rdi, %rax -; CHECK-O0-NEXT: movl $32, %ecx -; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx -; CHECK-O0-NEXT: xorl %r8d, %r8d -; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movq %rcx, %rdi -; CHECK-O0-NEXT: movl %r8d, %ecx ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movl $32, %edi +; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: callq __atomic_load -; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload -; CHECK-O0-NEXT: movq %rsi, 24(%rdi) -; CHECK-O0-NEXT: movq %rdx, 16(%rdi) -; CHECK-O0-NEXT: movq %rcx, 8(%rdi) -; CHECK-O0-NEXT: movq %rax, (%rdi) -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload +; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %r8 +; CHECK-O0-NEXT: movq %r8, 24(%rdi) +; CHECK-O0-NEXT: movq %rsi, 16(%rdi) +; CHECK-O0-NEXT: movq %rdx, 8(%rdi) +; CHECK-O0-NEXT: movq %rcx, (%rdi) ; CHECK-O0-NEXT: addq $56, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq @@ -369,18 +366,18 @@ ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: subq $40, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 48 -; CHECK-O0-NEXT: xorl %eax, %eax -; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %r9 -; CHECK-O0-NEXT: movq %rsi, {{[0-9]+}}(%rsp) -; CHECK-O0-NEXT: movq %rdx, {{[0-9]+}}(%rsp) -; CHECK-O0-NEXT: movq %rcx, {{[0-9]+}}(%rsp) +; CHECK-O0-NEXT: movq %rcx, %rax +; CHECK-O0-NEXT: movq %rdx, (%rsp) # 8-byte Spill +; CHECK-O0-NEXT: movq %rsi, %r9 +; CHECK-O0-NEXT: movq %rdi, %rsi +; CHECK-O0-NEXT: movq (%rsp), %rdi # 8-byte Reload +; CHECK-O0-NEXT: xorl %ecx, %ecx +; CHECK-O0-NEXT: leaq {{[0-9]+}}(%rsp), %rdx +; CHECK-O0-NEXT: movq %r9, {{[0-9]+}}(%rsp) +; CHECK-O0-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; CHECK-O0-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; CHECK-O0-NEXT: movq %r8, {{[0-9]+}}(%rsp) -; CHECK-O0-NEXT: movl $32, %ecx -; CHECK-O0-NEXT: movq %rdi, (%rsp) # 8-byte Spill -; CHECK-O0-NEXT: movq %rcx, %rdi -; CHECK-O0-NEXT: movq (%rsp), %rsi # 8-byte Reload -; CHECK-O0-NEXT: movq %r9, %rdx -; CHECK-O0-NEXT: movl %eax, %ecx +; CHECK-O0-NEXT: movl $32, %edi ; CHECK-O0-NEXT: callq __atomic_store ; CHECK-O0-NEXT: addq $40, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 @@ -411,10 +408,10 @@ define void @vec_store(i32* %p0, <2 x i32> %vec) { ; CHECK-O0-CUR-LABEL: vec_store: ; CHECK-O0-CUR: # %bb.0: -; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax -; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %ecx -; CHECK-O0-CUR-NEXT: movl %eax, (%rdi) -; CHECK-O0-CUR-NEXT: movl %ecx, 4(%rdi) +; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx +; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax +; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi) +; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: vec_store: @@ -448,10 +445,10 @@ define void @vec_store_unaligned(i32* %p0, <2 x i32> %vec) { ; CHECK-O0-CUR-LABEL: vec_store_unaligned: ; CHECK-O0-CUR: # %bb.0: -; CHECK-O0-CUR-NEXT: vmovd %xmm0, %eax -; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %ecx -; CHECK-O0-CUR-NEXT: movl %eax, (%rdi) -; CHECK-O0-CUR-NEXT: movl %ecx, 4(%rdi) +; CHECK-O0-CUR-NEXT: vmovd %xmm0, %ecx +; CHECK-O0-CUR-NEXT: vpextrd $1, %xmm0, %eax +; CHECK-O0-CUR-NEXT: movl %ecx, (%rdi) +; CHECK-O0-CUR-NEXT: movl %eax, 4(%rdi) ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: vec_store_unaligned: @@ -594,17 +591,11 @@ } define i64 @load_fold_add2(i64* %p, i64 %v2) { -; CHECK-O0-LABEL: load_fold_add2: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: addq (%rdi), %rsi -; CHECK-O0-NEXT: movq %rsi, %rax -; CHECK-O0-NEXT: retq -; -; CHECK-O3-LABEL: load_fold_add2: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rax -; CHECK-O3-NEXT: addq (%rdi), %rax -; CHECK-O3-NEXT: retq +; CHECK-LABEL: load_fold_add2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: addq (%rdi), %rax +; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = add i64 %v, %v2 ret i64 %ret @@ -694,17 +685,11 @@ } define i64 @load_fold_mul2(i64* %p, i64 %v2) { -; CHECK-O0-LABEL: load_fold_mul2: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: imulq (%rdi), %rsi -; CHECK-O0-NEXT: movq %rsi, %rax -; CHECK-O0-NEXT: retq -; -; CHECK-O3-LABEL: load_fold_mul2: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rax -; CHECK-O3-NEXT: imulq (%rdi), %rax -; CHECK-O3-NEXT: retq +; CHECK-LABEL: load_fold_mul2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: imulq (%rdi), %rax +; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = mul i64 %v, %v2 ret i64 %ret @@ -1129,8 +1114,8 @@ define i64 @load_fold_shl2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_shl2: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq %rsi, %rcx +; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $cl killed $rcx ; CHECK-O0-NEXT: shlq %cl, %rax ; CHECK-O0-NEXT: retq @@ -1179,8 +1164,8 @@ define i64 @load_fold_lshr2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_lshr2: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq %rsi, %rcx +; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $cl killed $rcx ; CHECK-O0-NEXT: shrq %cl, %rax ; CHECK-O0-NEXT: retq @@ -1229,8 +1214,8 @@ define i64 @load_fold_ashr2(i64* %p, i64 %v2) { ; CHECK-O0-LABEL: load_fold_ashr2: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq %rsi, %rcx +; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $cl killed $rcx ; CHECK-O0-NEXT: sarq %cl, %rax ; CHECK-O0-NEXT: retq @@ -1283,17 +1268,11 @@ } define i64 @load_fold_and2(i64* %p, i64 %v2) { -; CHECK-O0-LABEL: load_fold_and2: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: andq (%rdi), %rsi -; CHECK-O0-NEXT: movq %rsi, %rax -; CHECK-O0-NEXT: retq -; -; CHECK-O3-LABEL: load_fold_and2: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rax -; CHECK-O3-NEXT: andq (%rdi), %rax -; CHECK-O3-NEXT: retq +; CHECK-LABEL: load_fold_and2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: andq (%rdi), %rax +; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = and i64 %v, %v2 ret i64 %ret @@ -1336,17 +1315,11 @@ } define i64 @load_fold_or2(i64* %p, i64 %v2) { -; CHECK-O0-LABEL: load_fold_or2: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: orq (%rdi), %rsi -; CHECK-O0-NEXT: movq %rsi, %rax -; CHECK-O0-NEXT: retq -; -; CHECK-O3-LABEL: load_fold_or2: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rax -; CHECK-O3-NEXT: orq (%rdi), %rax -; CHECK-O3-NEXT: retq +; CHECK-LABEL: load_fold_or2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: orq (%rdi), %rax +; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = or i64 %v, %v2 ret i64 %ret @@ -1389,17 +1362,11 @@ } define i64 @load_fold_xor2(i64* %p, i64 %v2) { -; CHECK-O0-LABEL: load_fold_xor2: -; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: xorq (%rdi), %rsi -; CHECK-O0-NEXT: movq %rsi, %rax -; CHECK-O0-NEXT: retq -; -; CHECK-O3-LABEL: load_fold_xor2: -; CHECK-O3: # %bb.0: -; CHECK-O3-NEXT: movq %rsi, %rax -; CHECK-O3-NEXT: xorq (%rdi), %rax -; CHECK-O3-NEXT: retq +; CHECK-LABEL: load_fold_xor2: +; CHECK: # %bb.0: +; CHECK-NEXT: movq %rsi, %rax +; CHECK-NEXT: xorq (%rdi), %rax +; CHECK-NEXT: retq %v = load atomic i64, i64* %p unordered, align 8 %ret = xor i64 %v, %v2 ret i64 %ret @@ -1434,9 +1401,7 @@ ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: subq $15, %rax -; CHECK-O0-NEXT: sete %cl -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movb %cl, %al +; CHECK-O0-NEXT: sete %al ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_icmp1: @@ -1454,9 +1419,7 @@ ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: subq %rsi, %rax -; CHECK-O0-NEXT: sete %cl -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movb %cl, %al +; CHECK-O0-NEXT: sete %al ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_fold_icmp2: @@ -1475,9 +1438,7 @@ ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq (%rsi), %rcx ; CHECK-O0-NEXT: subq %rcx, %rax -; CHECK-O0-NEXT: sete %cl -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movb %cl, %al +; CHECK-O0-NEXT: sete %al ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_icmp3: @@ -1620,17 +1581,17 @@ define void @rmw_fold_sdiv1(i64* %p, i64 %v) { ; CHECK-O0-LABEL: rmw_fold_sdiv1: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 -; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: imulq %rcx -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; CHECK-O0-NEXT: addq %rax, %rdx -; CHECK-O0-NEXT: movq %rdx, %rcx +; CHECK-O0-NEXT: movq (%rdi), %rcx +; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rdx # imm = 0x8888888888888889 +; CHECK-O0-NEXT: movq %rcx, %rax +; CHECK-O0-NEXT: imulq %rdx +; CHECK-O0-NEXT: movq %rdx, %rax +; CHECK-O0-NEXT: addq %rcx, %rax +; CHECK-O0-NEXT: movq %rax, %rcx ; CHECK-O0-NEXT: shrq $63, %rcx -; CHECK-O0-NEXT: sarq $3, %rdx -; CHECK-O0-NEXT: addq %rcx, %rdx -; CHECK-O0-NEXT: movq %rdx, (%rdi) +; CHECK-O0-NEXT: sarq $3, %rax +; CHECK-O0-NEXT: addq %rcx, %rax +; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_sdiv1: @@ -1761,16 +1722,17 @@ ; CHECK-O0-LABEL: rmw_fold_srem1: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movabsq $-8608480567731124087, %rcx # imm = 0x8888888888888889 ; CHECK-O0-NEXT: imulq %rcx ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; CHECK-O0-NEXT: addq %rax, %rdx ; CHECK-O0-NEXT: movq %rdx, %rcx -; CHECK-O0-NEXT: shrq $63, %rcx -; CHECK-O0-NEXT: sarq $3, %rdx -; CHECK-O0-NEXT: addq %rcx, %rdx -; CHECK-O0-NEXT: leaq (%rdx,%rdx,4), %rcx +; CHECK-O0-NEXT: addq %rax, %rcx +; CHECK-O0-NEXT: movq %rcx, %rdx +; CHECK-O0-NEXT: shrq $63, %rdx +; CHECK-O0-NEXT: sarq $3, %rcx +; CHECK-O0-NEXT: addq %rdx, %rcx +; CHECK-O0-NEXT: leaq (%rcx,%rcx,4), %rcx ; CHECK-O0-NEXT: leaq (%rcx,%rcx,2), %rcx ; CHECK-O0-NEXT: subq %rcx, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) @@ -1932,9 +1894,9 @@ ; CHECK-O0-LABEL: rmw_fold_shl2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $rsi +; CHECK-O0-NEXT: movb %sil, %dl ; CHECK-O0-NEXT: # implicit-def: $rcx -; CHECK-O0-NEXT: movb %sil, %cl +; CHECK-O0-NEXT: movb %dl, %cl ; CHECK-O0-NEXT: shlxq %rcx, %rax, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq @@ -1988,9 +1950,9 @@ ; CHECK-O0-LABEL: rmw_fold_lshr2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $rsi +; CHECK-O0-NEXT: movb %sil, %dl ; CHECK-O0-NEXT: # implicit-def: $rcx -; CHECK-O0-NEXT: movb %sil, %cl +; CHECK-O0-NEXT: movb %dl, %cl ; CHECK-O0-NEXT: shrxq %rcx, %rax, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq @@ -2044,9 +2006,9 @@ ; CHECK-O0-LABEL: rmw_fold_ashr2: ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: # kill: def $sil killed $sil killed $rsi +; CHECK-O0-NEXT: movb %sil, %dl ; CHECK-O0-NEXT: # implicit-def: $rcx -; CHECK-O0-NEXT: movb %sil, %cl +; CHECK-O0-NEXT: movb %dl, %cl ; CHECK-O0-NEXT: sarxq %rcx, %rax, %rax ; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq @@ -2268,12 +2230,12 @@ define i32 @split_load(i64* %p) { ; CHECK-O0-LABEL: split_load: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq (%rdi), %rax -; CHECK-O0-NEXT: movb %al, %cl -; CHECK-O0-NEXT: shrq $32, %rax -; CHECK-O0-NEXT: # kill: def $al killed $al killed $rax -; CHECK-O0-NEXT: orb %al, %cl -; CHECK-O0-NEXT: movzbl %cl, %eax +; CHECK-O0-NEXT: movq (%rdi), %rcx +; CHECK-O0-NEXT: movb %cl, %al +; CHECK-O0-NEXT: shrq $32, %rcx +; CHECK-O0-NEXT: # kill: def $cl killed $cl killed $rcx +; CHECK-O0-NEXT: orb %cl, %al +; CHECK-O0-NEXT: movzbl %al, %eax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: split_load: @@ -2411,8 +2373,8 @@ define i64 @fold_constant(i64 %arg) { ; CHECK-O0-LABEL: fold_constant: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: addq Constant, %rdi ; CHECK-O0-NEXT: movq %rdi, %rax +; CHECK-O0-NEXT: addq Constant, %rax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: fold_constant: @@ -2602,10 +2564,9 @@ define i32 @load_i16_anyext_i32(i16* %ptr) { ; CHECK-O0-CUR-LABEL: load_i16_anyext_i32: ; CHECK-O0-CUR: # %bb.0: -; CHECK-O0-CUR-NEXT: movw (%rdi), %ax -; CHECK-O0-CUR-NEXT: # implicit-def: $ecx -; CHECK-O0-CUR-NEXT: movw %ax, %cx -; CHECK-O0-CUR-NEXT: movl %ecx, %eax +; CHECK-O0-CUR-NEXT: movw (%rdi), %cx +; CHECK-O0-CUR-NEXT: # implicit-def: $eax +; CHECK-O0-CUR-NEXT: movw %cx, %ax ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_i16_anyext_i32: @@ -2633,10 +2594,10 @@ define i64 @load_i16_anyext_i64(i16* %ptr) { ; CHECK-O0-CUR-LABEL: load_i16_anyext_i64: ; CHECK-O0-CUR: # %bb.0: -; CHECK-O0-CUR-NEXT: movw (%rdi), %ax -; CHECK-O0-CUR-NEXT: # implicit-def: $ecx -; CHECK-O0-CUR-NEXT: movw %ax, %cx -; CHECK-O0-CUR-NEXT: vmovd %ecx, %xmm0 +; CHECK-O0-CUR-NEXT: movw (%rdi), %cx +; CHECK-O0-CUR-NEXT: # implicit-def: $eax +; CHECK-O0-CUR-NEXT: movw %cx, %ax +; CHECK-O0-CUR-NEXT: vmovd %eax, %xmm0 ; CHECK-O0-CUR-NEXT: vmovq %xmm0, %rax ; CHECK-O0-CUR-NEXT: retq ; Index: llvm/test/CodeGen/X86/atomic32.ll =================================================================== --- llvm/test/CodeGen/X86/atomic32.ll +++ llvm/test/CodeGen/X86/atomic32.ll @@ -71,9 +71,8 @@ ; X64-NEXT: andl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) ; X64-NEXT: sete %cl +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: testb $1, %cl -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB2_2 ; X64-NEXT: jmp .LBB2_1 @@ -95,10 +94,9 @@ ; X86-NEXT: andl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 ; X86-NEXT: sete %cl -; X86-NEXT: testb $1, %cl -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: testb $1, %cl +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB2_2 ; X86-NEXT: jmp .LBB2_1 ; X86-NEXT: .LBB2_2: # %atomicrmw.end @@ -125,9 +123,8 @@ ; X64-NEXT: orl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) ; X64-NEXT: sete %cl +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: testb $1, %cl -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB3_2 ; X64-NEXT: jmp .LBB3_1 @@ -149,10 +146,9 @@ ; X86-NEXT: orl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 ; X86-NEXT: sete %cl -; X86-NEXT: testb $1, %cl -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: testb $1, %cl +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB3_2 ; X86-NEXT: jmp .LBB3_1 ; X86-NEXT: .LBB3_2: # %atomicrmw.end @@ -179,9 +175,8 @@ ; X64-NEXT: xorl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) ; X64-NEXT: sete %cl +; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: testb $1, %cl -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB4_2 ; X64-NEXT: jmp .LBB4_1 @@ -203,10 +198,9 @@ ; X86-NEXT: xorl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 ; X86-NEXT: sete %cl -; X86-NEXT: testb $1, %cl -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: testb $1, %cl +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB4_2 ; X86-NEXT: jmp .LBB4_1 ; X86-NEXT: .LBB4_2: # %atomicrmw.end @@ -223,15 +217,16 @@ define void @atomic_fetch_nand32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_nand32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB5_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload +; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl %edx, %ecx +; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: notl %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) ; X64-NEXT: sete %cl @@ -244,26 +239,27 @@ ; ; X86-LABEL: atomic_fetch_nand32: ; X86: # %bb.0: -; X86-NEXT: subl $8, %esp +; X86-NEXT: subl $12, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl sc32, %ecx ; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X86-NEXT: movl sc32, %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: .LBB5_1: # %atomicrmw.start ; X86-NEXT: # =>This Inner Loop Header: Depth=1 -; X86-NEXT: movl (%esp), %eax # 4-byte Reload -; X86-NEXT: movl %eax, %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andl %edx, %ecx +; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-NEXT: notl %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 ; X86-NEXT: sete %cl ; X86-NEXT: testb $1, %cl -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: jne .LBB5_2 ; X86-NEXT: jmp .LBB5_1 ; X86-NEXT: .LBB5_2: # %atomicrmw.end -; X86-NEXT: addl $8, %esp +; X86-NEXT: addl $12, %esp ; X86-NEXT: retl %t1 = atomicrmw nand i32* @sc32, i32 %x acquire ret void @@ -272,21 +268,21 @@ define void @atomic_fetch_max32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_max32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB6_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; X64-NEXT: subl %edx, %ecx -; X64-NEXT: cmovgl %eax, %edx -; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; X64-NEXT: movl %eax, %edx +; X64-NEXT: subl %ecx, %edx +; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: cmovgl %eax, %ecx +; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB6_2 ; X64-NEXT: jmp .LBB6_1 ; X64-NEXT: .LBB6_2: # %atomicrmw.end @@ -296,21 +292,21 @@ ; X86-CMOV: # %bb.0: ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl sc32, %ecx ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl sc32, %eax +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB6_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-CMOV-NEXT: subl %edx, %ecx -; X86-CMOV-NEXT: cmovgl %eax, %edx -; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %dl -; X86-CMOV-NEXT: testb $1, %dl +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl %eax, %edx +; X86-CMOV-NEXT: subl %ecx, %edx +; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: cmovgl %eax, %ecx +; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-CMOV-NEXT: sete %cl +; X86-CMOV-NEXT: testb $1, %cl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB6_2 ; X86-CMOV-NEXT: jmp .LBB6_1 ; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end @@ -319,21 +315,19 @@ ; ; X86-NOCMOV-LABEL: atomic_fetch_max32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %esi -; X86-NOCMOV-NEXT: subl $20, %esp +; X86-NOCMOV-NEXT: subl $16, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: movl sc32, %ecx ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl sc32, %eax +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB6_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jg .LBB6_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1 @@ -341,39 +335,33 @@ ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB6_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB6_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, %eax -; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %dl -; X86-NOCMOV-NEXT: testb $1, %dl +; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOCMOV-NEXT: sete %cl +; X86-NOCMOV-NEXT: testb $1, %cl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB6_2 ; X86-NOCMOV-NEXT: jmp .LBB6_1 ; X86-NOCMOV-NEXT: .LBB6_2: # %atomicrmw.end -; X86-NOCMOV-NEXT: addl $20, %esp -; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: addl $16, %esp ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_max32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %esi -; X86-NOX87-NEXT: subl $20, %esp +; X86-NOX87-NEXT: subl $16, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOX87-NEXT: movl sc32, %ecx ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl sc32, %eax +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB6_1: # %atomicrmw.start ; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOX87-NEXT: movl %eax, %ecx -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: subl %edx, %ecx -; X86-NOX87-NEXT: movl %eax, %esi ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jg .LBB6_4 ; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB6_1 Depth=1 @@ -381,20 +369,16 @@ ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB6_4: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB6_1 Depth=1 -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, %eax -; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %dl -; X86-NOX87-NEXT: testb $1, %dl +; X86-NOX87-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOX87-NEXT: sete %cl +; X86-NOX87-NEXT: testb $1, %cl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB6_2 ; X86-NOX87-NEXT: jmp .LBB6_1 ; X86-NOX87-NEXT: .LBB6_2: # %atomicrmw.end -; X86-NOX87-NEXT: addl $20, %esp -; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: addl $16, %esp ; X86-NOX87-NEXT: retl %t1 = atomicrmw max i32* @sc32, i32 %x acquire ret void @@ -403,21 +387,21 @@ define void @atomic_fetch_min32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_min32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB7_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; X64-NEXT: subl %edx, %ecx -; X64-NEXT: cmovlel %eax, %edx -; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; X64-NEXT: movl %eax, %edx +; X64-NEXT: subl %ecx, %edx +; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: cmovlel %eax, %ecx +; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB7_2 ; X64-NEXT: jmp .LBB7_1 ; X64-NEXT: .LBB7_2: # %atomicrmw.end @@ -427,21 +411,21 @@ ; X86-CMOV: # %bb.0: ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl sc32, %ecx ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl sc32, %eax +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB7_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-CMOV-NEXT: subl %edx, %ecx -; X86-CMOV-NEXT: cmovlel %eax, %edx -; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %dl -; X86-CMOV-NEXT: testb $1, %dl +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl %eax, %edx +; X86-CMOV-NEXT: subl %ecx, %edx +; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: cmovlel %eax, %ecx +; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-CMOV-NEXT: sete %cl +; X86-CMOV-NEXT: testb $1, %cl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB7_2 ; X86-CMOV-NEXT: jmp .LBB7_1 ; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end @@ -450,21 +434,19 @@ ; ; X86-NOCMOV-LABEL: atomic_fetch_min32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %esi -; X86-NOCMOV-NEXT: subl $20, %esp +; X86-NOCMOV-NEXT: subl $16, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: movl sc32, %ecx ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl sc32, %eax +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB7_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jle .LBB7_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1 @@ -472,39 +454,33 @@ ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB7_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB7_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, %eax -; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %dl -; X86-NOCMOV-NEXT: testb $1, %dl +; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOCMOV-NEXT: sete %cl +; X86-NOCMOV-NEXT: testb $1, %cl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB7_2 ; X86-NOCMOV-NEXT: jmp .LBB7_1 ; X86-NOCMOV-NEXT: .LBB7_2: # %atomicrmw.end -; X86-NOCMOV-NEXT: addl $20, %esp -; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: addl $16, %esp ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_min32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %esi -; X86-NOX87-NEXT: subl $20, %esp +; X86-NOX87-NEXT: subl $16, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOX87-NEXT: movl sc32, %ecx ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl sc32, %eax +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB7_1: # %atomicrmw.start ; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOX87-NEXT: movl %eax, %ecx -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: subl %edx, %ecx -; X86-NOX87-NEXT: movl %eax, %esi ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jle .LBB7_4 ; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB7_1 Depth=1 @@ -512,20 +488,16 @@ ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB7_4: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB7_1 Depth=1 -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, %eax -; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %dl -; X86-NOX87-NEXT: testb $1, %dl +; X86-NOX87-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOX87-NEXT: sete %cl +; X86-NOX87-NEXT: testb $1, %cl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB7_2 ; X86-NOX87-NEXT: jmp .LBB7_1 ; X86-NOX87-NEXT: .LBB7_2: # %atomicrmw.end -; X86-NOX87-NEXT: addl $20, %esp -; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: addl $16, %esp ; X86-NOX87-NEXT: retl %t1 = atomicrmw min i32* @sc32, i32 %x acquire ret void @@ -534,21 +506,21 @@ define void @atomic_fetch_umax32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_umax32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB8_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; X64-NEXT: subl %edx, %ecx -; X64-NEXT: cmoval %eax, %edx -; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; X64-NEXT: movl %eax, %edx +; X64-NEXT: subl %ecx, %edx +; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: cmoval %eax, %ecx +; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB8_2 ; X64-NEXT: jmp .LBB8_1 ; X64-NEXT: .LBB8_2: # %atomicrmw.end @@ -558,21 +530,21 @@ ; X86-CMOV: # %bb.0: ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl sc32, %ecx ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl sc32, %eax +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB8_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-CMOV-NEXT: subl %edx, %ecx -; X86-CMOV-NEXT: cmoval %eax, %edx -; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %dl -; X86-CMOV-NEXT: testb $1, %dl +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl %eax, %edx +; X86-CMOV-NEXT: subl %ecx, %edx +; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: cmoval %eax, %ecx +; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-CMOV-NEXT: sete %cl +; X86-CMOV-NEXT: testb $1, %cl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB8_2 ; X86-CMOV-NEXT: jmp .LBB8_1 ; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end @@ -581,21 +553,19 @@ ; ; X86-NOCMOV-LABEL: atomic_fetch_umax32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %esi -; X86-NOCMOV-NEXT: subl $20, %esp +; X86-NOCMOV-NEXT: subl $16, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: movl sc32, %ecx ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl sc32, %eax +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB8_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: ja .LBB8_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1 @@ -603,39 +573,33 @@ ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB8_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB8_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, %eax -; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %dl -; X86-NOCMOV-NEXT: testb $1, %dl +; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOCMOV-NEXT: sete %cl +; X86-NOCMOV-NEXT: testb $1, %cl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB8_2 ; X86-NOCMOV-NEXT: jmp .LBB8_1 ; X86-NOCMOV-NEXT: .LBB8_2: # %atomicrmw.end -; X86-NOCMOV-NEXT: addl $20, %esp -; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: addl $16, %esp ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_umax32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %esi -; X86-NOX87-NEXT: subl $20, %esp +; X86-NOX87-NEXT: subl $16, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOX87-NEXT: movl sc32, %ecx ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl sc32, %eax +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB8_1: # %atomicrmw.start ; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOX87-NEXT: movl %eax, %ecx -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: subl %edx, %ecx -; X86-NOX87-NEXT: movl %eax, %esi ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: ja .LBB8_4 ; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB8_1 Depth=1 @@ -643,20 +607,16 @@ ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB8_4: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB8_1 Depth=1 -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, %eax -; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %dl -; X86-NOX87-NEXT: testb $1, %dl +; X86-NOX87-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOX87-NEXT: sete %cl +; X86-NOX87-NEXT: testb $1, %cl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB8_2 ; X86-NOX87-NEXT: jmp .LBB8_1 ; X86-NOX87-NEXT: .LBB8_2: # %atomicrmw.end -; X86-NOX87-NEXT: addl $20, %esp -; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: addl $16, %esp ; X86-NOX87-NEXT: retl %t1 = atomicrmw umax i32* @sc32, i32 %x acquire ret void @@ -665,21 +625,21 @@ define void @atomic_fetch_umin32(i32 %x) nounwind { ; X64-LABEL: atomic_fetch_umin32: ; X64: # %bb.0: -; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: movl sc32, %eax ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: .LBB9_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload -; X64-NEXT: movl %eax, %ecx -; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload -; X64-NEXT: subl %edx, %ecx -; X64-NEXT: cmovbel %eax, %edx -; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload +; X64-NEXT: movl %eax, %edx +; X64-NEXT: subl %ecx, %edx +; X64-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; X64-NEXT: cmovbel %eax, %ecx +; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill -; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB9_2 ; X64-NEXT: jmp .LBB9_1 ; X64-NEXT: .LBB9_2: # %atomicrmw.end @@ -689,21 +649,21 @@ ; X86-CMOV: # %bb.0: ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-CMOV-NEXT: movl sc32, %ecx ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-CMOV-NEXT: movl sc32, %eax +; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: .LBB9_1: # %atomicrmw.start ; X86-CMOV-NEXT: # =>This Inner Loop Header: Depth=1 ; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X86-CMOV-NEXT: movl %eax, %ecx -; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; X86-CMOV-NEXT: subl %edx, %ecx -; X86-CMOV-NEXT: cmovbel %eax, %edx -; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %dl -; X86-CMOV-NEXT: testb $1, %dl +; X86-CMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X86-CMOV-NEXT: movl %eax, %edx +; X86-CMOV-NEXT: subl %ecx, %edx +; X86-CMOV-NEXT: movl %edx, (%esp) # 4-byte Spill +; X86-CMOV-NEXT: cmovbel %eax, %ecx +; X86-CMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-CMOV-NEXT: sete %cl +; X86-CMOV-NEXT: testb $1, %cl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB9_2 ; X86-CMOV-NEXT: jmp .LBB9_1 ; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end @@ -712,21 +672,19 @@ ; ; X86-NOCMOV-LABEL: atomic_fetch_umin32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %esi -; X86-NOCMOV-NEXT: subl $20, %esp +; X86-NOCMOV-NEXT: subl $16, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOCMOV-NEXT: movl sc32, %ecx ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOCMOV-NEXT: movl sc32, %eax +; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB9_1: # %atomicrmw.start ; X86-NOCMOV-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOCMOV-NEXT: movl %eax, %ecx -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: subl %edx, %ecx -; X86-NOCMOV-NEXT: movl %eax, %esi ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jbe .LBB9_4 ; X86-NOCMOV-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1 @@ -734,39 +692,33 @@ ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: .LBB9_4: # %atomicrmw.start ; X86-NOCMOV-NEXT: # in Loop: Header=BB9_1 Depth=1 -; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOCMOV-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOCMOV-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOCMOV-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOCMOV-NEXT: movl %ecx, %eax -; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %dl -; X86-NOCMOV-NEXT: testb $1, %dl +; X86-NOCMOV-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOCMOV-NEXT: sete %cl +; X86-NOCMOV-NEXT: testb $1, %cl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB9_2 ; X86-NOCMOV-NEXT: jmp .LBB9_1 ; X86-NOCMOV-NEXT: .LBB9_2: # %atomicrmw.end -; X86-NOCMOV-NEXT: addl $20, %esp -; X86-NOCMOV-NEXT: popl %esi +; X86-NOCMOV-NEXT: addl $16, %esp ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_umin32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %esi -; X86-NOX87-NEXT: subl $20, %esp +; X86-NOX87-NEXT: subl $16, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOX87-NEXT: movl sc32, %ecx ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NOX87-NEXT: movl sc32, %eax +; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB9_1: # %atomicrmw.start ; X86-NOX87-NEXT: # =>This Inner Loop Header: Depth=1 +; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NOX87-NEXT: movl %eax, %ecx -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X86-NOX87-NEXT: subl %edx, %ecx -; X86-NOX87-NEXT: movl %eax, %esi ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X86-NOX87-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jbe .LBB9_4 ; X86-NOX87-NEXT: # %bb.3: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB9_1 Depth=1 @@ -774,20 +726,16 @@ ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: .LBB9_4: # %atomicrmw.start ; X86-NOX87-NEXT: # in Loop: Header=BB9_1 Depth=1 -; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NOX87-NEXT: movl (%esp), %eax # 4-byte Reload ; X86-NOX87-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X86-NOX87-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NOX87-NEXT: movl %ecx, %eax -; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload -; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %dl -; X86-NOX87-NEXT: testb $1, %dl +; X86-NOX87-NEXT: lock cmpxchgl %ecx, sc32 +; X86-NOX87-NEXT: sete %cl +; X86-NOX87-NEXT: testb $1, %cl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB9_2 ; X86-NOX87-NEXT: jmp .LBB9_1 ; X86-NOX87-NEXT: .LBB9_2: # %atomicrmw.end -; X86-NOX87-NEXT: addl $20, %esp -; X86-NOX87-NEXT: popl %esi +; X86-NOX87-NEXT: addl $16, %esp ; X86-NOX87-NEXT: retl %t1 = atomicrmw umin i32* @sc32, i32 %x acquire ret void Index: llvm/test/CodeGen/X86/atomic64.ll =================================================================== --- llvm/test/CodeGen/X86/atomic64.ll +++ llvm/test/CodeGen/X86/atomic64.ll @@ -17,46 +17,37 @@ ; ; I486-LABEL: atomic_fetch_add64: ; I486: # %bb.0: # %entry -; I486-NEXT: pushl %esi -; I486-NEXT: subl $48, %esp +; I486-NEXT: subl $16, %esp ; I486-NEXT: leal sc64, %eax -; I486-NEXT: movl %esp, %ecx -; I486-NEXT: movl $2, 12(%ecx) -; I486-NEXT: movl $0, 8(%ecx) -; I486-NEXT: movl $1, 4(%ecx) -; I486-NEXT: movl $sc64, (%ecx) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $1, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_add_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $0, 8(%esi) -; I486-NEXT: movl $3, 4(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $3, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_add_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $0, 8(%esi) -; I486-NEXT: movl $5, 4(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $5, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_add_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %edx, 8(%esi) -; I486-NEXT: movl %eax, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, %ecx +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_add_8 -; I486-NEXT: addl $48, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl entry: %t1 = atomicrmw add i64* @sc64, i64 1 acquire @@ -78,46 +69,37 @@ ; ; I486-LABEL: atomic_fetch_sub64: ; I486: # %bb.0: -; I486-NEXT: pushl %esi -; I486-NEXT: subl $48, %esp +; I486-NEXT: subl $16, %esp ; I486-NEXT: leal sc64, %eax -; I486-NEXT: movl %esp, %ecx -; I486-NEXT: movl $2, 12(%ecx) -; I486-NEXT: movl $0, 8(%ecx) -; I486-NEXT: movl $1, 4(%ecx) -; I486-NEXT: movl $sc64, (%ecx) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $1, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_sub_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $0, 8(%esi) -; I486-NEXT: movl $3, 4(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $3, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_sub_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $0, 8(%esi) -; I486-NEXT: movl $5, 4(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $5, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_sub_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %edx, 8(%esi) -; I486-NEXT: movl %eax, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, %ecx +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_sub_8 -; I486-NEXT: addl $48, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl %t1 = atomicrmw sub i64* @sc64, i64 1 acquire %t2 = atomicrmw sub i64* @sc64, i64 3 acquire @@ -140,9 +122,8 @@ ; X64-NEXT: # kill: def $rcx killed $ecx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) ; X64-NEXT: sete %cl +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: testb $1, %cl -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB2_2 ; X64-NEXT: jmp .LBB2_1 @@ -153,36 +134,30 @@ ; ; I486-LABEL: atomic_fetch_and64: ; I486: # %bb.0: -; I486-NEXT: pushl %esi -; I486-NEXT: subl $36, %esp +; I486-NEXT: subl $16, %esp ; I486-NEXT: leal sc64, %eax -; I486-NEXT: movl %esp, %ecx -; I486-NEXT: movl $2, 12(%ecx) -; I486-NEXT: movl $0, 8(%ecx) -; I486-NEXT: movl $3, 4(%ecx) -; I486-NEXT: movl $sc64, (%ecx) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $3, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_and_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $0, 8(%esi) -; I486-NEXT: movl $5, 4(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $5, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_and_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %edx, 8(%esi) -; I486-NEXT: movl %eax, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, %ecx +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_and_8 -; I486-NEXT: addl $36, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl %t1 = atomicrmw and i64* @sc64, i64 3 acquire %t2 = atomicrmw and i64* @sc64, i64 5 acquire @@ -203,9 +178,8 @@ ; X64-NEXT: orq $5, %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) ; X64-NEXT: sete %cl +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: testb $1, %cl -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB3_2 ; X64-NEXT: jmp .LBB3_1 @@ -216,36 +190,30 @@ ; ; I486-LABEL: atomic_fetch_or64: ; I486: # %bb.0: -; I486-NEXT: pushl %esi -; I486-NEXT: subl $36, %esp +; I486-NEXT: subl $16, %esp ; I486-NEXT: leal sc64, %eax -; I486-NEXT: movl %esp, %ecx -; I486-NEXT: movl $2, 12(%ecx) -; I486-NEXT: movl $0, 8(%ecx) -; I486-NEXT: movl $3, 4(%ecx) -; I486-NEXT: movl $sc64, (%ecx) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $3, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_or_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $0, 8(%esi) -; I486-NEXT: movl $5, 4(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $5, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_or_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %edx, 8(%esi) -; I486-NEXT: movl %eax, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, %ecx +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_or_8 -; I486-NEXT: addl $36, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl %t1 = atomicrmw or i64* @sc64, i64 3 acquire %t2 = atomicrmw or i64* @sc64, i64 5 acquire @@ -266,9 +234,8 @@ ; X64-NEXT: xorq $5, %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) ; X64-NEXT: sete %cl +; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: testb $1, %cl -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB4_2 ; X64-NEXT: jmp .LBB4_1 @@ -279,36 +246,30 @@ ; ; I486-LABEL: atomic_fetch_xor64: ; I486: # %bb.0: -; I486-NEXT: pushl %esi -; I486-NEXT: subl $36, %esp +; I486-NEXT: subl $16, %esp ; I486-NEXT: leal sc64, %eax -; I486-NEXT: movl %esp, %ecx -; I486-NEXT: movl $2, 12(%ecx) -; I486-NEXT: movl $0, 8(%ecx) -; I486-NEXT: movl $3, 4(%ecx) -; I486-NEXT: movl $sc64, (%ecx) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $3, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_xor_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $0, 8(%esi) -; I486-NEXT: movl $5, 4(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $0, 8(%eax) +; I486-NEXT: movl $5, 4(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_xor_8 -; I486-NEXT: leal sc64, %ecx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %edx, 8(%esi) -; I486-NEXT: movl %eax, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, %ecx +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_xor_8 -; I486-NEXT: addl $36, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl %t1 = atomicrmw xor i64* @sc64, i64 3 acquire %t2 = atomicrmw xor i64* @sc64, i64 5 acquire @@ -319,15 +280,16 @@ define void @atomic_fetch_nand64(i64 %x) nounwind { ; X64-LABEL: atomic_fetch_nand64: ; X64: # %bb.0: -; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: .LBB5_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rcx ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; X64-NEXT: movq %rax, %rcx ; X64-NEXT: andq %rdx, %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: notq %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) ; X64-NEXT: sete %cl @@ -340,20 +302,17 @@ ; ; I486-LABEL: atomic_fetch_nand64: ; I486: # %bb.0: -; I486-NEXT: pushl %esi -; I486-NEXT: subl $20, %esp -; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: subl $16, %esp +; I486-NEXT: movl {{[0-9]+}}(%esp), %edx ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I486-NEXT: leal sc64, %edx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %eax, 8(%esi) -; I486-NEXT: movl %ecx, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_fetch_nand_8 -; I486-NEXT: addl $20, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl %t1 = atomicrmw nand i64* @sc64, i64 %x acquire ret void @@ -362,21 +321,21 @@ define void @atomic_fetch_max64(i64 %x) nounwind { ; X64-LABEL: atomic_fetch_max64: ; X64: # %bb.0: -; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: .LBB6_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: subq %rdx, %rcx -; X64-NEXT: cmovgq %rax, %rdx -; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: subq %rcx, %rdx +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: cmovgq %rax, %rcx +; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB6_2 ; X64-NEXT: jmp .LBB6_1 ; X64-NEXT: .LBB6_2: # %atomicrmw.end @@ -386,70 +345,65 @@ ; I486: # %bb.0: ; I486-NEXT: pushl %ebp ; I486-NEXT: movl %esp, %ebp -; I486-NEXT: pushl %ebx -; I486-NEXT: pushl %edi ; I486-NEXT: pushl %esi ; I486-NEXT: andl $-8, %esp ; I486-NEXT: subl $72, %esp ; I486-NEXT: movl 12(%ebp), %eax -; I486-NEXT: movl 8(%ebp), %ecx -; I486-NEXT: movl sc64+4, %edx -; I486-NEXT: movl sc64, %esi ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl 8(%ebp), %eax +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl sc64+4, %eax +; I486-NEXT: movl sc64, %ecx ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jmp .LBB6_1 ; I486-NEXT: .LBB6_1: # %atomicrmw.start ; I486-NEXT: # =>This Inner Loop Header: Depth=1 -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: subl %ecx, %edx ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; I486-NEXT: sbbl %eax, %esi -; I486-NEXT: movl %ecx, %edi -; I486-NEXT: movl %eax, %ebx +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: subl %ecx, %esi +; I486-NEXT: sbbl %eax, %edx +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jl .LBB6_4 ; I486-NEXT: # %bb.3: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB6_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: .LBB6_4: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB6_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: movl %edx, {{[0-9]+}}(%esp) -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl %esi, {{[0-9]+}}(%esp) -; I486-NEXT: movl %esp, %edi -; I486-NEXT: movl %eax, 12(%edi) -; I486-NEXT: movl %ecx, 8(%edi) -; I486-NEXT: leal {{[0-9]+}}(%esp), %eax -; I486-NEXT: movl %eax, 4(%edi) -; I486-NEXT: movl $2, 20(%edi) -; I486-NEXT: movl $2, 16(%edi) -; I486-NEXT: movl $sc64, (%edi) +; I486-NEXT: movl %eax, {{[0-9]+}}(%esp) +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 12(%eax) +; I486-NEXT: movl %ecx, 8(%eax) +; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 20(%eax) +; I486-NEXT: movl $2, 16(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_compare_exchange_8 +; I486-NEXT: movb %al, %dl ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I486-NEXT: movl {{[0-9]+}}(%esp), %edx -; I486-NEXT: testb %al, %al +; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: testb %dl, %dl ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: je .LBB6_1 ; I486-NEXT: jmp .LBB6_2 ; I486-NEXT: .LBB6_2: # %atomicrmw.end -; I486-NEXT: leal -12(%ebp), %esp +; I486-NEXT: leal -4(%ebp), %esp ; I486-NEXT: popl %esi -; I486-NEXT: popl %edi -; I486-NEXT: popl %ebx ; I486-NEXT: popl %ebp ; I486-NEXT: retl %t1 = atomicrmw max i64* @sc64, i64 %x acquire @@ -460,21 +414,21 @@ define void @atomic_fetch_min64(i64 %x) nounwind { ; X64-LABEL: atomic_fetch_min64: ; X64: # %bb.0: -; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: .LBB7_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: subq %rdx, %rcx -; X64-NEXT: cmovleq %rax, %rdx -; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: subq %rcx, %rdx +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: cmovleq %rax, %rcx +; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB7_2 ; X64-NEXT: jmp .LBB7_1 ; X64-NEXT: .LBB7_2: # %atomicrmw.end @@ -484,70 +438,65 @@ ; I486: # %bb.0: ; I486-NEXT: pushl %ebp ; I486-NEXT: movl %esp, %ebp -; I486-NEXT: pushl %ebx -; I486-NEXT: pushl %edi ; I486-NEXT: pushl %esi ; I486-NEXT: andl $-8, %esp ; I486-NEXT: subl $72, %esp ; I486-NEXT: movl 12(%ebp), %eax -; I486-NEXT: movl 8(%ebp), %ecx -; I486-NEXT: movl sc64+4, %edx -; I486-NEXT: movl sc64, %esi ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl 8(%ebp), %eax +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl sc64+4, %eax +; I486-NEXT: movl sc64, %ecx ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jmp .LBB7_1 ; I486-NEXT: .LBB7_1: # %atomicrmw.start ; I486-NEXT: # =>This Inner Loop Header: Depth=1 -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: subl %ecx, %edx ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; I486-NEXT: sbbl %eax, %esi -; I486-NEXT: movl %ecx, %edi -; I486-NEXT: movl %eax, %ebx +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: subl %ecx, %esi +; I486-NEXT: sbbl %eax, %edx +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jge .LBB7_4 ; I486-NEXT: # %bb.3: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB7_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: .LBB7_4: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB7_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: movl %edx, {{[0-9]+}}(%esp) -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl %esi, {{[0-9]+}}(%esp) -; I486-NEXT: movl %esp, %edi -; I486-NEXT: movl %eax, 12(%edi) -; I486-NEXT: movl %ecx, 8(%edi) -; I486-NEXT: leal {{[0-9]+}}(%esp), %eax -; I486-NEXT: movl %eax, 4(%edi) -; I486-NEXT: movl $2, 20(%edi) -; I486-NEXT: movl $2, 16(%edi) -; I486-NEXT: movl $sc64, (%edi) +; I486-NEXT: movl %eax, {{[0-9]+}}(%esp) +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 12(%eax) +; I486-NEXT: movl %ecx, 8(%eax) +; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 20(%eax) +; I486-NEXT: movl $2, 16(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_compare_exchange_8 +; I486-NEXT: movb %al, %dl ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I486-NEXT: movl {{[0-9]+}}(%esp), %edx -; I486-NEXT: testb %al, %al +; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: testb %dl, %dl ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: je .LBB7_1 ; I486-NEXT: jmp .LBB7_2 ; I486-NEXT: .LBB7_2: # %atomicrmw.end -; I486-NEXT: leal -12(%ebp), %esp +; I486-NEXT: leal -4(%ebp), %esp ; I486-NEXT: popl %esi -; I486-NEXT: popl %edi -; I486-NEXT: popl %ebx ; I486-NEXT: popl %ebp ; I486-NEXT: retl %t1 = atomicrmw min i64* @sc64, i64 %x acquire @@ -558,21 +507,21 @@ define void @atomic_fetch_umax64(i64 %x) nounwind { ; X64-LABEL: atomic_fetch_umax64: ; X64: # %bb.0: -; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: .LBB8_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: subq %rdx, %rcx -; X64-NEXT: cmovaq %rax, %rdx -; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: subq %rcx, %rdx +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: cmovaq %rax, %rcx +; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB8_2 ; X64-NEXT: jmp .LBB8_1 ; X64-NEXT: .LBB8_2: # %atomicrmw.end @@ -582,70 +531,65 @@ ; I486: # %bb.0: ; I486-NEXT: pushl %ebp ; I486-NEXT: movl %esp, %ebp -; I486-NEXT: pushl %ebx -; I486-NEXT: pushl %edi ; I486-NEXT: pushl %esi ; I486-NEXT: andl $-8, %esp ; I486-NEXT: subl $72, %esp ; I486-NEXT: movl 12(%ebp), %eax -; I486-NEXT: movl 8(%ebp), %ecx -; I486-NEXT: movl sc64+4, %edx -; I486-NEXT: movl sc64, %esi ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl 8(%ebp), %eax +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl sc64+4, %eax +; I486-NEXT: movl sc64, %ecx ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jmp .LBB8_1 ; I486-NEXT: .LBB8_1: # %atomicrmw.start ; I486-NEXT: # =>This Inner Loop Header: Depth=1 -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: subl %ecx, %edx ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; I486-NEXT: sbbl %eax, %esi -; I486-NEXT: movl %ecx, %edi -; I486-NEXT: movl %eax, %ebx +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: subl %ecx, %esi +; I486-NEXT: sbbl %eax, %edx +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jb .LBB8_4 ; I486-NEXT: # %bb.3: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB8_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: .LBB8_4: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB8_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: movl %edx, {{[0-9]+}}(%esp) -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl %esi, {{[0-9]+}}(%esp) -; I486-NEXT: movl %esp, %edi -; I486-NEXT: movl %eax, 12(%edi) -; I486-NEXT: movl %ecx, 8(%edi) -; I486-NEXT: leal {{[0-9]+}}(%esp), %eax -; I486-NEXT: movl %eax, 4(%edi) -; I486-NEXT: movl $2, 20(%edi) -; I486-NEXT: movl $2, 16(%edi) -; I486-NEXT: movl $sc64, (%edi) +; I486-NEXT: movl %eax, {{[0-9]+}}(%esp) +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 12(%eax) +; I486-NEXT: movl %ecx, 8(%eax) +; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 20(%eax) +; I486-NEXT: movl $2, 16(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_compare_exchange_8 +; I486-NEXT: movb %al, %dl ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I486-NEXT: movl {{[0-9]+}}(%esp), %edx -; I486-NEXT: testb %al, %al +; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: testb %dl, %dl ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: je .LBB8_1 ; I486-NEXT: jmp .LBB8_2 ; I486-NEXT: .LBB8_2: # %atomicrmw.end -; I486-NEXT: leal -12(%ebp), %esp +; I486-NEXT: leal -4(%ebp), %esp ; I486-NEXT: popl %esi -; I486-NEXT: popl %edi -; I486-NEXT: popl %ebx ; I486-NEXT: popl %ebp ; I486-NEXT: retl %t1 = atomicrmw umax i64* @sc64, i64 %x acquire @@ -656,21 +600,21 @@ define void @atomic_fetch_umin64(i64 %x) nounwind { ; X64-LABEL: atomic_fetch_umin64: ; X64: # %bb.0: -; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: movq sc64, %rax ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: .LBB9_1: # %atomicrmw.start ; X64-NEXT: # =>This Inner Loop Header: Depth=1 ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; X64-NEXT: subq %rdx, %rcx -; X64-NEXT: cmovbeq %rax, %rdx -; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; X64-NEXT: movq %rax, %rdx +; X64-NEXT: subq %rcx, %rdx +; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: cmovbeq %rax, %rcx +; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB9_2 ; X64-NEXT: jmp .LBB9_1 ; X64-NEXT: .LBB9_2: # %atomicrmw.end @@ -680,70 +624,65 @@ ; I486: # %bb.0: ; I486-NEXT: pushl %ebp ; I486-NEXT: movl %esp, %ebp -; I486-NEXT: pushl %ebx -; I486-NEXT: pushl %edi ; I486-NEXT: pushl %esi ; I486-NEXT: andl $-8, %esp ; I486-NEXT: subl $72, %esp ; I486-NEXT: movl 12(%ebp), %eax -; I486-NEXT: movl 8(%ebp), %ecx -; I486-NEXT: movl sc64+4, %edx -; I486-NEXT: movl sc64, %esi ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl 8(%ebp), %eax +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl sc64+4, %eax +; I486-NEXT: movl sc64, %ecx ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jmp .LBB9_1 ; I486-NEXT: .LBB9_1: # %atomicrmw.start ; I486-NEXT: # =>This Inner Loop Header: Depth=1 -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: subl %ecx, %edx ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; I486-NEXT: sbbl %eax, %esi -; I486-NEXT: movl %ecx, %edi -; I486-NEXT: movl %eax, %ebx +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: subl %ecx, %esi +; I486-NEXT: sbbl %eax, %edx +; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: jae .LBB9_4 ; I486-NEXT: # %bb.3: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB9_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: .LBB9_4: # %atomicrmw.start ; I486-NEXT: # in Loop: Header=BB9_1 Depth=1 ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; I486-NEXT: movl %edx, {{[0-9]+}}(%esp) -; I486-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; I486-NEXT: movl %esi, {{[0-9]+}}(%esp) -; I486-NEXT: movl %esp, %edi -; I486-NEXT: movl %eax, 12(%edi) -; I486-NEXT: movl %ecx, 8(%edi) -; I486-NEXT: leal {{[0-9]+}}(%esp), %eax -; I486-NEXT: movl %eax, 4(%edi) -; I486-NEXT: movl $2, 20(%edi) -; I486-NEXT: movl $2, 16(%edi) -; I486-NEXT: movl $sc64, (%edi) +; I486-NEXT: movl %eax, {{[0-9]+}}(%esp) +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 12(%eax) +; I486-NEXT: movl %ecx, 8(%eax) +; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 20(%eax) +; I486-NEXT: movl $2, 16(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_compare_exchange_8 +; I486-NEXT: movb %al, %dl ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I486-NEXT: movl {{[0-9]+}}(%esp), %edx -; I486-NEXT: testb %al, %al +; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: testb %dl, %dl ; I486-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; I486-NEXT: je .LBB9_1 ; I486-NEXT: jmp .LBB9_2 ; I486-NEXT: .LBB9_2: # %atomicrmw.end -; I486-NEXT: leal -12(%ebp), %esp +; I486-NEXT: leal -4(%ebp), %esp ; I486-NEXT: popl %esi -; I486-NEXT: popl %edi -; I486-NEXT: popl %ebx ; I486-NEXT: popl %ebp ; I486-NEXT: retl %t1 = atomicrmw umin i64* @sc64, i64 %x acquire @@ -765,19 +704,18 @@ ; I486-NEXT: pushl %ebp ; I486-NEXT: movl %esp, %ebp ; I486-NEXT: andl $-8, %esp -; I486-NEXT: subl $40, %esp +; I486-NEXT: subl $32, %esp ; I486-NEXT: leal sc64, %eax ; I486-NEXT: leal {{[0-9]+}}(%esp), %ecx ; I486-NEXT: movl $0, {{[0-9]+}}(%esp) ; I486-NEXT: movl $0, {{[0-9]+}}(%esp) -; I486-NEXT: movl %esp, %edx -; I486-NEXT: movl %ecx, 4(%edx) -; I486-NEXT: movl $2, 20(%edx) -; I486-NEXT: movl $2, 16(%edx) -; I486-NEXT: movl $0, 12(%edx) -; I486-NEXT: movl $1, 8(%edx) -; I486-NEXT: movl $sc64, (%edx) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 20(%eax) +; I486-NEXT: movl $2, 16(%eax) +; I486-NEXT: movl $0, 12(%eax) +; I486-NEXT: movl $1, 8(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_compare_exchange_8 ; I486-NEXT: movl %ebp, %esp ; I486-NEXT: popl %ebp @@ -794,20 +732,17 @@ ; ; I486-LABEL: atomic_fetch_store64: ; I486: # %bb.0: -; I486-NEXT: pushl %esi -; I486-NEXT: subl $20, %esp -; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: subl $16, %esp +; I486-NEXT: movl {{[0-9]+}}(%esp), %edx ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I486-NEXT: leal sc64, %edx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %eax, 8(%esi) -; I486-NEXT: movl %ecx, 4(%esi) -; I486-NEXT: movl $3, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $3, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_store_8 -; I486-NEXT: addl $20, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl store atomic i64 %x, i64* @sc64 release, align 8 ret void @@ -821,20 +756,17 @@ ; ; I486-LABEL: atomic_fetch_swap64: ; I486: # %bb.0: -; I486-NEXT: pushl %esi -; I486-NEXT: subl $20, %esp -; I486-NEXT: movl {{[0-9]+}}(%esp), %eax +; I486-NEXT: subl $16, %esp +; I486-NEXT: movl {{[0-9]+}}(%esp), %edx ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx -; I486-NEXT: leal sc64, %edx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %eax, 8(%esi) -; I486-NEXT: movl %ecx, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $sc64, (%esi) -; I486-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: leal sc64, %eax +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $sc64, (%eax) ; I486-NEXT: calll __atomic_exchange_8 -; I486-NEXT: addl $20, %esp -; I486-NEXT: popl %esi +; I486-NEXT: addl $16, %esp ; I486-NEXT: retl %t1 = atomicrmw xchg i64* @sc64, i64 %x acquire ret void @@ -851,23 +783,20 @@ ; I486: # %bb.0: ; I486-NEXT: pushl %ebp ; I486-NEXT: movl %esp, %ebp -; I486-NEXT: pushl %esi ; I486-NEXT: andl $-8, %esp -; I486-NEXT: subl $40, %esp +; I486-NEXT: subl $24, %esp ; I486-NEXT: fldl 8(%ebp) ; I486-NEXT: leal fsc64, %eax ; I486-NEXT: fstpl {{[0-9]+}}(%esp) ; I486-NEXT: movl {{[0-9]+}}(%esp), %ecx ; I486-NEXT: movl {{[0-9]+}}(%esp), %edx -; I486-NEXT: movl %esp, %esi -; I486-NEXT: movl %edx, 8(%esi) -; I486-NEXT: movl %ecx, 4(%esi) -; I486-NEXT: movl $2, 12(%esi) -; I486-NEXT: movl $fsc64, (%esi) -; I486-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; I486-NEXT: movl %esp, %eax +; I486-NEXT: movl %edx, 8(%eax) +; I486-NEXT: movl %ecx, 4(%eax) +; I486-NEXT: movl $2, 12(%eax) +; I486-NEXT: movl $fsc64, (%eax) ; I486-NEXT: calll __atomic_exchange_8 -; I486-NEXT: leal -4(%ebp), %esp -; I486-NEXT: popl %esi +; I486-NEXT: movl %ebp, %esp ; I486-NEXT: popl %ebp ; I486-NEXT: retl %t1 = atomicrmw xchg double* @fsc64, double %x acquire Index: llvm/test/CodeGen/X86/atomic6432.ll =================================================================== --- llvm/test/CodeGen/X86/atomic6432.ll +++ llvm/test/CodeGen/X86/atomic6432.ll @@ -7,106 +7,98 @@ ; X32-LABEL: atomic_fetch_add64: ; X32: # %bb.0: # %entry ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $56, %esp -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: subl $72, %esp +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB0_1 ; X32-NEXT: .LBB0_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl $1, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl $1, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB0_1 ; X32-NEXT: jmp .LBB0_2 ; X32-NEXT: .LBB0_2: # %atomicrmw.end -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB0_3 ; X32-NEXT: .LBB0_3: # %atomicrmw.start2 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl $3, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl $3, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB0_3 ; X32-NEXT: jmp .LBB0_4 ; X32-NEXT: .LBB0_4: # %atomicrmw.end1 -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB0_5 ; X32-NEXT: .LBB0_5: # %atomicrmw.start8 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl $5, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $0, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl $5, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $0, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB0_5 ; X32-NEXT: jmp .LBB0_6 ; X32-NEXT: .LBB0_6: # %atomicrmw.end7 -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB0_7 ; X32-NEXT: .LBB0_7: # %atomicrmw.start14 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: addl %esi, %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: adcl %ebx, %edi -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl %ecx, %ebx +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl %esi, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB0_7 ; X32-NEXT: jmp .LBB0_8 ; X32-NEXT: .LBB0_8: # %atomicrmw.end13 -; X32-NEXT: addl $56, %esp +; X32-NEXT: addl $72, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl entry: @@ -121,106 +113,98 @@ ; X32-LABEL: atomic_fetch_sub64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $56, %esp -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: subl $72, %esp +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB1_1 ; X32-NEXT: .LBB1_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl $-1, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $-1, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl $-1, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $-1, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB1_1 ; X32-NEXT: jmp .LBB1_2 ; X32-NEXT: .LBB1_2: # %atomicrmw.end -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB1_3 ; X32-NEXT: .LBB1_3: # %atomicrmw.start2 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl $-3, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $-1, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl $-3, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $-1, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB1_3 ; X32-NEXT: jmp .LBB1_4 ; X32-NEXT: .LBB1_4: # %atomicrmw.end1 -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB1_5 ; X32-NEXT: .LBB1_5: # %atomicrmw.start8 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: addl $-5, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: adcl $-1, %esi +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: addl $-5, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: adcl $-1, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB1_5 ; X32-NEXT: jmp .LBB1_6 ; X32-NEXT: .LBB1_6: # %atomicrmw.end7 -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB1_7 ; X32-NEXT: .LBB1_7: # %atomicrmw.start14 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: subl %esi, %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: sbbl %ebx, %edi -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: subl %ecx, %ebx +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: sbbl %esi, %ecx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB1_7 ; X32-NEXT: jmp .LBB1_8 ; X32-NEXT: .LBB1_8: # %atomicrmw.end13 -; X32-NEXT: addl $56, %esp +; X32-NEXT: addl $72, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw sub i64* @sc64, i64 1 acquire @@ -234,83 +218,75 @@ ; X32-LABEL: atomic_fetch_and64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $44, %esp -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: subl $52, %esp +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB2_1 ; X32-NEXT: .LBB2_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: andl $3, %ecx -; X32-NEXT: xorl %esi, %esi -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: andl $3, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB2_1 ; X32-NEXT: jmp .LBB2_2 ; X32-NEXT: .LBB2_2: # %atomicrmw.end -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB2_3 ; X32-NEXT: .LBB2_3: # %atomicrmw.start2 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: andl $1, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: andl $1, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: andl $1, %esi ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB2_3 ; X32-NEXT: jmp .LBB2_4 ; X32-NEXT: .LBB2_4: # %atomicrmw.end1 -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB2_5 ; X32-NEXT: .LBB2_5: # %atomicrmw.start8 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: andl %ecx, %ebx +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: andl %esi, %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: andl %ebx, %edi -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB2_5 ; X32-NEXT: jmp .LBB2_6 ; X32-NEXT: .LBB2_6: # %atomicrmw.end7 -; X32-NEXT: addl $44, %esp +; X32-NEXT: addl $52, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw and i64* @sc64, i64 3 acquire @@ -323,84 +299,75 @@ ; X32-LABEL: atomic_fetch_or64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $48, %esp -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: subl $52, %esp +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB3_1 ; X32-NEXT: .LBB3_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: orl $3, %ecx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: orl $3, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %edx ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB3_1 ; X32-NEXT: jmp .LBB3_2 ; X32-NEXT: .LBB3_2: # %atomicrmw.end -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB3_3 ; X32-NEXT: .LBB3_3: # %atomicrmw.start2 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: orl $1, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: orl $1, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: orl $1, %esi ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB3_3 ; X32-NEXT: jmp .LBB3_4 ; X32-NEXT: .LBB3_4: # %atomicrmw.end1 -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB3_5 ; X32-NEXT: .LBB3_5: # %atomicrmw.start8 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: orl %ecx, %ebx +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: orl %esi, %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: orl %ebx, %edi -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB3_5 ; X32-NEXT: jmp .LBB3_6 ; X32-NEXT: .LBB3_6: # %atomicrmw.end7 -; X32-NEXT: addl $48, %esp +; X32-NEXT: addl $52, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw or i64* @sc64, i64 3 acquire @@ -413,84 +380,75 @@ ; X32-LABEL: atomic_fetch_xor64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $48, %esp -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: subl $52, %esp +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB4_1 ; X32-NEXT: .LBB4_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: xorl $3, %ecx -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: xorl $3, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %ecx, %edx ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB4_1 ; X32-NEXT: jmp .LBB4_2 ; X32-NEXT: .LBB4_2: # %atomicrmw.end -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB4_3 ; X32-NEXT: .LBB4_3: # %atomicrmw.start2 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: xorl $1, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: xorl $1, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: xorl $1, %esi ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: movl %edx, %esi -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB4_3 ; X32-NEXT: jmp .LBB4_4 ; X32-NEXT: .LBB4_4: # %atomicrmw.end1 -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %ecx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB4_5 ; X32-NEXT: .LBB4_5: # %atomicrmw.start8 ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %eax, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: xorl %ecx, %ebx +; X32-NEXT: movl %ebx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, %ecx ; X32-NEXT: xorl %esi, %ecx -; X32-NEXT: movl %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: xorl %ebx, %edi -; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx -; X32-NEXT: movl (%esp), %ebx # 4-byte Reload +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB4_5 ; X32-NEXT: jmp .LBB4_6 ; X32-NEXT: .LBB4_6: # %atomicrmw.end7 -; X32-NEXT: addl $48, %esp +; X32-NEXT: addl $52, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw xor i64* @sc64, i64 3 acquire @@ -505,36 +463,39 @@ ; X32-NEXT: pushl %ebx ; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $16, %esp +; X32-NEXT: subl $32, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx -; X32-NEXT: movl sc64, %esi +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB5_1 ; X32-NEXT: .LBB5_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl %edx, %ecx ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; X32-NEXT: andl %esi, %ecx -; X32-NEXT: movl %eax, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: andl %ebx, %edi -; X32-NEXT: notl %edi +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; X32-NEXT: movl %edx, %ecx +; X32-NEXT: andl %edi, %ecx +; X32-NEXT: movl %ecx, (%esp) # 4-byte Spill +; X32-NEXT: movl %eax, %ebx +; X32-NEXT: andl %esi, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: notl %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: notl %ecx -; X32-NEXT: movl %edi, %ebx +; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB5_1 ; X32-NEXT: jmp .LBB5_2 ; X32-NEXT: .LBB5_2: # %atomicrmw.end -; X32-NEXT: addl $16, %esp +; X32-NEXT: addl $32, %esp ; X32-NEXT: popl %esi ; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx @@ -547,42 +508,41 @@ ; X32-LABEL: atomic_fetch_max64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $24, %esp +; X32-NEXT: subl $32, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx -; X32-NEXT: movl sc64, %esi +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB6_1 ; X32-NEXT: .LBB6_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: subl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %ebx, %esi +; X32-NEXT: subl %eax, %esi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, %esi ; X32-NEXT: sbbl %edx, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: cmovll %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: cmovll %eax, %ebx +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: cmovll %edx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx +; X32-NEXT: cmovll %eax, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB6_1 ; X32-NEXT: jmp .LBB6_2 ; X32-NEXT: .LBB6_2: # %atomicrmw.end -; X32-NEXT: addl $24, %esp +; X32-NEXT: addl $32, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw max i64* @sc64, i64 %x acquire @@ -593,42 +553,41 @@ ; X32-LABEL: atomic_fetch_min64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $24, %esp +; X32-NEXT: subl $32, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx -; X32-NEXT: movl sc64, %esi +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB7_1 ; X32-NEXT: .LBB7_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: subl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %ebx, %esi +; X32-NEXT: subl %eax, %esi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, %esi ; X32-NEXT: sbbl %edx, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: cmovgel %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: cmovgel %eax, %ebx +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: cmovgel %edx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx +; X32-NEXT: cmovgel %eax, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB7_1 ; X32-NEXT: jmp .LBB7_2 ; X32-NEXT: .LBB7_2: # %atomicrmw.end -; X32-NEXT: addl $24, %esp +; X32-NEXT: addl $32, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw min i64* @sc64, i64 %x acquire @@ -639,42 +598,41 @@ ; X32-LABEL: atomic_fetch_umax64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $24, %esp +; X32-NEXT: subl $32, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx -; X32-NEXT: movl sc64, %esi +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB8_1 ; X32-NEXT: .LBB8_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: subl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %ebx, %esi +; X32-NEXT: subl %eax, %esi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, %esi ; X32-NEXT: sbbl %edx, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: cmovbl %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: cmovbl %eax, %ebx +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: cmovbl %edx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx +; X32-NEXT: cmovbl %eax, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB8_1 ; X32-NEXT: jmp .LBB8_2 ; X32-NEXT: .LBB8_2: # %atomicrmw.end -; X32-NEXT: addl $24, %esp +; X32-NEXT: addl $32, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw umax i64* @sc64, i64 %x acquire @@ -685,42 +643,41 @@ ; X32-LABEL: atomic_fetch_umin64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %edi ; X32-NEXT: pushl %esi -; X32-NEXT: subl $24, %esp +; X32-NEXT: subl $32, %esp ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl sc64+4, %edx -; X32-NEXT: movl sc64, %esi +; X32-NEXT: movl sc64, %eax ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB9_1 ; X32-NEXT: .LBB9_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; X32-NEXT: subl %eax, %ecx -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; X32-NEXT: movl %ebx, %esi +; X32-NEXT: subl %eax, %esi +; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %ecx, %esi ; X32-NEXT: sbbl %edx, %esi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; X32-NEXT: cmovael %edx, %edi -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; X32-NEXT: cmovael %eax, %ebx +; X32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: cmovael %edx, %ecx ; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edi, %ecx +; X32-NEXT: cmovael %eax, %ebx +; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %esi, (%esp) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB9_1 ; X32-NEXT: jmp .LBB9_2 ; X32-NEXT: .LBB9_2: # %atomicrmw.end -; X32-NEXT: addl $24, %esp +; X32-NEXT: addl $32, %esp ; X32-NEXT: popl %esi -; X32-NEXT: popl %edi ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = atomicrmw umin i64* @sc64, i64 %x acquire @@ -731,14 +688,11 @@ ; X32-LABEL: atomic_fetch_cmpxchg64: ; X32: # %bb.0: ; X32-NEXT: pushl %ebx -; X32-NEXT: pushl %eax -; X32-NEXT: xorl %eax, %eax +; X32-NEXT: xorl %ecx, %ecx ; X32-NEXT: movl $1, %ebx -; X32-NEXT: movl %eax, (%esp) # 4-byte Spill -; X32-NEXT: movl (%esp), %edx # 4-byte Reload -; X32-NEXT: movl (%esp), %ecx # 4-byte Reload +; X32-NEXT: movl %ecx, %eax +; X32-NEXT: movl %ecx, %edx ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: addl $4, %esp ; X32-NEXT: popl %ebx ; X32-NEXT: retl %t1 = cmpxchg i64* @sc64, i64 0, i64 1 acquire acquire @@ -763,24 +717,24 @@ ; X32: # %bb.0: ; X32-NEXT: pushl %ebx ; X32-NEXT: subl $16, %esp -; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X32-NEXT: movl sc64+4, %eax -; X32-NEXT: movl sc64, %edx -; X32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; X32-NEXT: movl %eax, (%esp) # 4-byte Spill +; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl sc64+4, %edx +; X32-NEXT: movl sc64, %eax +; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jmp .LBB12_1 ; X32-NEXT: .LBB12_1: # %atomicrmw.start ; X32-NEXT: # =>This Inner Loop Header: Depth=1 -; X32-NEXT: movl (%esp), %edx # 4-byte Reload +; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; X32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X32-NEXT: movl (%esp), %ecx # 4-byte Reload ; X32-NEXT: lock cmpxchg8b sc64 -; X32-NEXT: movl %edx, (%esp) # 4-byte Spill ; X32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X32-NEXT: jne .LBB12_1 ; X32-NEXT: jmp .LBB12_2 ; X32-NEXT: .LBB12_2: # %atomicrmw.end Index: llvm/test/CodeGen/X86/avx-load-store.ll =================================================================== --- llvm/test/CodeGen/X86/avx-load-store.ll +++ llvm/test/CodeGen/X86/avx-load-store.ll @@ -34,27 +34,27 @@ ; ; CHECK_O0-LABEL: test_256_load: ; CHECK_O0: # %bb.0: # %entry -; CHECK_O0-NEXT: subq $152, %rsp +; CHECK_O0-NEXT: subq $184, %rsp +; CHECK_O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK_O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK_O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK_O0-NEXT: vmovapd (%rdi), %ymm0 +; CHECK_O0-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill ; CHECK_O0-NEXT: vmovaps (%rsi), %ymm1 -; CHECK_O0-NEXT: vmovdqa (%rdx), %ymm2 -; CHECK_O0-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK_O0-NEXT: vmovups %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill +; CHECK_O0-NEXT: vmovdqa (%rdx), %ymm2 ; CHECK_O0-NEXT: vmovups %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK_O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK_O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK_O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK_O0-NEXT: callq dummy -; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload -; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload -; CHECK_O0-NEXT: vmovapd %ymm0, (%rax) -; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload +; CHECK_O0-NEXT: vmovups (%rsp), %ymm2 # 32-byte Reload +; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload ; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm1 # 32-byte Reload -; CHECK_O0-NEXT: vmovaps %ymm1, (%rcx) +; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi # 8-byte Reload +; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload ; CHECK_O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload -; CHECK_O0-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload -; CHECK_O0-NEXT: vmovdqa %ymm2, (%rdx) -; CHECK_O0-NEXT: addq $152, %rsp +; CHECK_O0-NEXT: vmovapd %ymm2, (%rdi) +; CHECK_O0-NEXT: vmovaps %ymm1, (%rsi) +; CHECK_O0-NEXT: vmovdqa %ymm0, (%rdx) +; CHECK_O0-NEXT: addq $184, %rsp ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq entry: @@ -173,9 +173,10 @@ ; ; CHECK_O0-LABEL: double_save: ; CHECK_O0: # %bb.0: -; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: # implicit-def: $ymm0 +; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0 +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq @@ -195,9 +196,10 @@ ; ; CHECK_O0-LABEL: double_save_volatile: ; CHECK_O0: # %bb.0: -; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: # implicit-def: $ymm0 +; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0 +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq @@ -272,11 +274,11 @@ ; ; CHECK_O0-LABEL: add8i32: ; CHECK_O0: # %bb.0: -; CHECK_O0-NEXT: vmovdqu (%rsi), %xmm0 +; CHECK_O0-NEXT: vmovdqu (%rsi), %xmm2 ; CHECK_O0-NEXT: vmovdqu 16(%rsi), %xmm1 -; CHECK_O0-NEXT: # implicit-def: $ymm2 -; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: # implicit-def: $ymm0 +; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0 +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq @@ -317,11 +319,11 @@ ; ; CHECK_O0-LABEL: add4i64a16: ; CHECK_O0: # %bb.0: -; CHECK_O0-NEXT: vmovdqa (%rsi), %xmm0 +; CHECK_O0-NEXT: vmovdqa (%rsi), %xmm2 ; CHECK_O0-NEXT: vmovdqa 16(%rsi), %xmm1 -; CHECK_O0-NEXT: # implicit-def: $ymm2 -; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: # implicit-def: $ymm0 +; CHECK_O0-NEXT: vmovaps %xmm2, %xmm0 +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq Index: llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll =================================================================== --- llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll +++ llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll @@ -19,44 +19,41 @@ ; CHECK: ## %bb.0: ; CHECK-NEXT: subq $56, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: vpmovw2m %xmm0, %k0 -; CHECK-NEXT: movl $2, %esi -; CHECK-NEXT: movl $8, %eax -; CHECK-NEXT: movq %rdx, %rdi -; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movl %eax, %edx ; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %rdx, %rdi +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill +; CHECK-NEXT: vpmovw2m %xmm0, %k0 ; CHECK-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movl $2, %esi +; CHECK-NEXT: movl $8, %edx ; CHECK-NEXT: callq _calc_expected_mask_val +; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload ; CHECK-NEXT: ## kill: def $eax killed $eax killed $rax ; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax ; CHECK-NEXT: movzwl %ax, %esi -; CHECK-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k0 ## 2-byte Reload ; CHECK-NEXT: kmovb %k0, %edi -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload ; CHECK-NEXT: callq _check_mask16 ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload -; CHECK-NEXT: vpmovd2m %xmm0, %k0 -; CHECK-NEXT: kmovq %k0, %k1 -; CHECK-NEXT: kmovd %k0, %ecx -; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: ## kill: def $cx killed $cx killed $ecx ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload +; CHECK-NEXT: vpmovd2m %xmm0, %k0 +; CHECK-NEXT: ## kill: def $k1 killed $k0 +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: ## kill: def $al killed $al killed $eax +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: ## kill: def $ax killed $ax killed $eax +; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; CHECK-NEXT: movl $4, %edx ; CHECK-NEXT: movl %edx, %esi -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; CHECK-NEXT: callq _calc_expected_mask_val -; CHECK-NEXT: ## kill: def $ax killed $ax killed $rax -; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx ## 2-byte Reload -; CHECK-NEXT: movzwl %cx, %edi -; CHECK-NEXT: movzwl %ax, %esi +; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %si ## 2-byte Reload ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload +; CHECK-NEXT: ## kill: def $ax killed $ax killed $rax +; CHECK-NEXT: movzwl %si, %edi +; CHECK-NEXT: movzwl %ax, %esi ; CHECK-NEXT: callq _check_mask16 ; CHECK-NEXT: addq $56, %rsp ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/bug47278-eflags-error.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/bug47278-eflags-error.mir @@ -0,0 +1,78 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=i386-unknown-linux-musl -verify-machineinstrs -run-pass=regallocfast -o - %s | FileCheck %s + +# Test for correct management of allocatable and non-allocatable +# live-ins in fastregalloc + +--- +name: live_through_ecx +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: live_through_ecx + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $ecx + ; CHECK: NOOP implicit $ecx + ; CHECK: bb.1: + ; CHECK: liveins: $ecx + ; CHECK: RET implicit killed $ecx + bb.0: + liveins: $ecx + NOOP implicit $ecx + + bb.1: + liveins: $ecx + + RET implicit $ecx + +... + +--- +name: live_out_ecx +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: live_out_ecx + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $eax, $ebx + ; CHECK: renamable $ecx = COPY killed $ebx + ; CHECK: bb.1: + ; CHECK: liveins: $ecx + ; CHECK: RET implicit killed $ecx + bb.0: + liveins: $eax, $ebx + %0:gr32 = COPY $eax + %1:gr32 = COPY $ebx + $ecx = COPY %1 + + bb.1: + liveins: $ecx + + RET implicit $ecx + +... + +--- +name: live_out_eflags +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: live_out_eflags + ; CHECK: bb.0: + ; CHECK: successors: %bb.1(0x80000000) + ; CHECK: liveins: $eax, $ebx + ; CHECK: TEST32rr killed renamable $eax, killed renamable $ebx, implicit-def $eflags + ; CHECK: bb.1: + ; CHECK: liveins: $eflags + ; CHECK: RET implicit killed $eflags + bb.0: + liveins: $eax, $ebx + %0:gr32 = COPY $eax + %1:gr32 = COPY $ebx + TEST32rr %0, %1, implicit-def $eflags + + bb.1: + liveins: $eflags + + RET implicit $eflags + +... Index: llvm/test/CodeGen/X86/bug47278.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/bug47278.mir @@ -0,0 +1,45 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=i386-unknown-linux-musl -verify-machineinstrs -run-pass=regallocfast -o - %s | FileCheck %s + +# Make sure this case doesn't assert or try to assign $ecx to %1 on +# SHRD32rrCL + +--- +name: foo +tracksRegLiveness: true +body: | + bb.0: + ; CHECK-LABEL: name: foo + ; CHECK: renamable $eax = IMPLICIT_DEF + ; CHECK: renamable $edx = MOVZX32rm8 renamable $eax, 1, $noreg, 0, $noreg :: (load 1 from `i168* undef` + 20, align 16) + ; CHECK: dead renamable $ecx = MOV32rm renamable $eax, 1, $noreg, 0, $noreg :: (load 4 from `i168* undef` + 12, align 16) + ; CHECK: renamable $al = MOV8rm killed renamable $eax, 1, $noreg, 0, $noreg :: (load 1 from `i32* undef`, align 4) + ; CHECK: dead renamable $ecx = COPY renamable $edx + ; CHECK: dead renamable $ecx = COPY renamable $edx + ; CHECK: dead renamable $ecx = COPY renamable $edx + ; CHECK: renamable $esi = IMPLICIT_DEF + ; CHECK: renamable $ecx = IMPLICIT_DEF + ; CHECK: renamable $ecx = CMOV32rr renamable $ecx, killed renamable $esi, 2, implicit undef $eflags + ; CHECK: renamable $cl = MOV8ri -128 + ; CHECK: $cl = IMPLICIT_DEF + ; CHECK: renamable $eax = COPY renamable $edx + ; CHECK: dead renamable $eax = SHRD32rrCL renamable $eax, killed renamable $edx, implicit-def dead $eflags, implicit killed $cl + ; CHECK: RETL + %0:gr32 = IMPLICIT_DEF + %1:gr32 = MOVZX32rm8 %0, 1, $noreg, 0, $noreg :: (load 1 from `i168* undef` + 20, align 16) + %2:gr32 = MOV32rm %0, 1, $noreg, 0, $noreg :: (load 4 from `i168* undef` + 12, align 16) + %3:gr8 = MOV8rm %0, 1, $noreg, 0, $noreg :: (load 1 from `i32* undef`, align 4) + %4:gr32 = COPY %1 + %5:gr32 = COPY %1 + %6:gr32 = COPY %1 + %7:gr32 = IMPLICIT_DEF + %8:gr32 = IMPLICIT_DEF + %8:gr32 = CMOV32rr %8, killed %7, 2, implicit undef $eflags + %9:gr8 = MOV8ri -128 + %9:gr8 = COPY %3 + $cl = IMPLICIT_DEF + %8:gr32 = COPY %1 + %8:gr32 = SHRD32rrCL %8, %1, implicit-def dead $eflags, implicit $cl + RETL + +... Index: llvm/test/CodeGen/X86/crash-O0.ll =================================================================== --- llvm/test/CodeGen/X86/crash-O0.ll +++ llvm/test/CodeGen/X86/crash-O0.ll @@ -16,14 +16,15 @@ ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## kill: def $al killed $al killed $eax +; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill ; CHECK-NEXT: ## implicit-def: $rcx ; CHECK-NEXT: ## kill: def $cl killed $cl killed $rcx -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill ; CHECK-NEXT: movzbw %al, %ax ; CHECK-NEXT: divb %cl -; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %dl ## 1-byte Reload -; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill -; CHECK-NEXT: movzbw %dl, %ax +; CHECK-NEXT: movb %al, %dl +; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al ## 1-byte Reload +; CHECK-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) ## 1-byte Spill +; CHECK-NEXT: movzbw %al, %ax ; CHECK-NEXT: divb %cl ; CHECK-NEXT: shrw $8, %ax ; CHECK-NEXT: ## kill: def $al killed $al killed $ax @@ -31,11 +32,11 @@ ; CHECK-NEXT: jae LBB0_2 ; CHECK-NEXT: ## %bb.1: ## %"39" ; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al ## 1-byte Reload -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: ## implicit-def: $edx -; CHECK-NEXT: imull %edx, %ecx -; CHECK-NEXT: addl %edx, %ecx -; CHECK-NEXT: cmpl %edx, %ecx +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: ## implicit-def: $ecx +; CHECK-NEXT: imull %ecx, %eax +; CHECK-NEXT: addl %ecx, %eax +; CHECK-NEXT: cmpl %ecx, %eax ; CHECK-NEXT: je LBB0_3 ; CHECK-NEXT: LBB0_2: ## %"40" ; CHECK-NEXT: ud2 @@ -79,12 +80,11 @@ ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: ## kill: def $rax killed $eax -; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movq %rcx, %rax ; CHECK-NEXT: cqto -; CHECK-NEXT: movslq %edi, %rcx -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload -; CHECK-NEXT: idivq (%rsi,%rcx,8) +; CHECK-NEXT: movslq %edi, %rsi +; CHECK-NEXT: idivq (%rcx,%rsi,8) ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq %gep = getelementptr i64, i64* null, i32 %V Index: llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll =================================================================== --- llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll +++ llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll @@ -8,7 +8,8 @@ %tmp = load i32, i32* %p, align 4, !dbg !7 ; CHECK: $eax = MOV32rm killed {{.*}} $rdi, {{.*}} debug-location !7 :: (load 4 from %ir.p) ; CHECK-NEXT: $rax = KILL killed renamable $eax, debug-location !7 - ; CHECK-NEXT: $rcx = MOV64rr $rax, debug-location !7 + ; CHECK-NEXT: MOV64mr $rsp, 1, $noreg, -8, $noreg, $rax :: (store 8 into %stack.0) + ; CHECK-NEXT: SUB64ri8 renamable $rax, 3, implicit-def $eflags, debug-location !7 switch i32 %tmp, label %bb7 [ i32 0, label %bb1 Index: llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll =================================================================== --- llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll +++ llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll @@ -19,7 +19,7 @@ ; different basic block, so its operands aren't necessarily exported ; for cross-block usage. -; CHECK: movb %al, [[OFS:[0-9]*]](%rsp) +; CHECK: movb %cl, [[OFS:[0-9]*]](%rsp) ; CHECK: callq {{_?}}bar ; CHECK: movb [[OFS]](%rsp), %al Index: llvm/test/CodeGen/X86/fast-isel-nontemporal.ll =================================================================== --- llvm/test/CodeGen/X86/fast-isel-nontemporal.ll +++ llvm/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -586,11 +586,11 @@ ; ; AVX1-LABEL: test_load_nt8xfloat: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xfloat: @@ -628,11 +628,11 @@ ; ; AVX1-LABEL: test_load_nt4xdouble: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt4xdouble: @@ -670,11 +670,11 @@ ; ; AVX1-LABEL: test_load_nt32xi8: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt32xi8: @@ -712,11 +712,11 @@ ; ; AVX1-LABEL: test_load_nt16xi16: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xi16: @@ -754,11 +754,11 @@ ; ; AVX1-LABEL: test_load_nt8xi32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xi32: @@ -796,11 +796,11 @@ ; ; AVX1-LABEL: test_load_nt4xi64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt4xi64: @@ -889,6 +889,7 @@ ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; + ; AVX512-LABEL: test_nt64xi8: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vmovntdq %zmm0, (%rdi) @@ -915,6 +916,7 @@ ; AVX-NEXT: vzeroupper ; AVX-NEXT: retq ; + ; AVX512-LABEL: test_nt32xi16: ; AVX512: # %bb.0: # %entry ; AVX512-NEXT: vmovntdq %zmm0, (%rdi) @@ -1008,16 +1010,16 @@ ; ; AVX1-LABEL: test_load_nt16xfloat: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xfloat: @@ -1062,16 +1064,16 @@ ; ; AVX1-LABEL: test_load_nt8xdouble: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xdouble: @@ -1116,16 +1118,16 @@ ; ; AVX1-LABEL: test_load_nt64xi8: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt64xi8: @@ -1170,16 +1172,16 @@ ; ; AVX1-LABEL: test_load_nt32xi16: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt32xi16: @@ -1224,16 +1226,16 @@ ; ; AVX1-LABEL: test_load_nt16xi32: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xi32: @@ -1278,16 +1280,16 @@ ; ; AVX1-LABEL: test_load_nt8xi64: ; AVX1: # %bb.0: # %entry -; AVX1-NEXT: vmovntdqa (%rdi), %xmm0 +; AVX1-NEXT: vmovntdqa (%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm0 +; AVX1-NEXT: vmovaps %xmm1, %xmm0 +; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 ; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm0, %xmm1 -; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 -; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 -; AVX1-NEXT: # implicit-def: $ymm2 -; AVX1-NEXT: vmovaps %xmm1, %xmm2 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 -; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 +; AVX1-NEXT: vmovaps %xmm2, %xmm1 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 +; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xi64: Index: llvm/test/CodeGen/X86/fast-isel-select-sse.ll =================================================================== --- llvm/test/CodeGen/X86/fast-isel-select-sse.ll +++ llvm/test/CodeGen/X86/fast-isel-select-sse.ll @@ -65,12 +65,15 @@ define float @select_fcmp_ogt_f32(float %a, float %b, float %c, float %d) { ; SSE-LABEL: select_fcmp_ogt_f32: ; SSE: # %bb.0: -; SSE-NEXT: cmpltss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andps %xmm2, %xmm0 -; SSE-NEXT: andnps %xmm3, %xmm1 -; SSE-NEXT: orps %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: cmpltss %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andps %xmm2, %xmm1 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ogt_f32: @@ -93,12 +96,15 @@ define double @select_fcmp_ogt_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_ogt_f64: ; SSE: # %bb.0: -; SSE-NEXT: cmpltsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm2, %xmm0 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero +; SSE-NEXT: cmpltsd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andpd %xmm2, %xmm1 +; SSE-NEXT: andnpd %xmm3, %xmm0 +; SSE-NEXT: orpd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ogt_f64: @@ -121,12 +127,15 @@ define float @select_fcmp_oge_f32(float %a, float %b, float %c, float %d) { ; SSE-LABEL: select_fcmp_oge_f32: ; SSE: # %bb.0: -; SSE-NEXT: cmpless %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andps %xmm2, %xmm0 -; SSE-NEXT: andnps %xmm3, %xmm1 -; SSE-NEXT: orps %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: cmpless %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andps %xmm2, %xmm1 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_oge_f32: @@ -149,12 +158,15 @@ define double @select_fcmp_oge_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_oge_f64: ; SSE: # %bb.0: -; SSE-NEXT: cmplesd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm2, %xmm0 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero +; SSE-NEXT: cmplesd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andpd %xmm2, %xmm1 +; SSE-NEXT: andnpd %xmm3, %xmm0 +; SSE-NEXT: orpd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_oge_f64: @@ -501,12 +513,15 @@ define float @select_fcmp_ult_f32(float %a, float %b, float %c, float %d) { ; SSE-LABEL: select_fcmp_ult_f32: ; SSE: # %bb.0: -; SSE-NEXT: cmpnless %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andps %xmm2, %xmm0 -; SSE-NEXT: andnps %xmm3, %xmm1 -; SSE-NEXT: orps %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: cmpnless %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andps %xmm2, %xmm1 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ult_f32: @@ -529,12 +544,15 @@ define double @select_fcmp_ult_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_ult_f64: ; SSE: # %bb.0: -; SSE-NEXT: cmpnlesd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm2, %xmm0 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero +; SSE-NEXT: cmpnlesd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andpd %xmm2, %xmm1 +; SSE-NEXT: andnpd %xmm3, %xmm0 +; SSE-NEXT: orpd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ult_f64: @@ -557,12 +575,15 @@ define float @select_fcmp_ule_f32(float %a, float %b, float %c, float %d) { ; SSE-LABEL: select_fcmp_ule_f32: ; SSE: # %bb.0: -; SSE-NEXT: cmpnltss %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andps %xmm2, %xmm0 -; SSE-NEXT: andnps %xmm3, %xmm1 -; SSE-NEXT: orps %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movss %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; SSE-NEXT: cmpnltss %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andps %xmm2, %xmm1 +; SSE-NEXT: andnps %xmm3, %xmm0 +; SSE-NEXT: orps %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ule_f32: @@ -585,12 +606,15 @@ define double @select_fcmp_ule_f64(double %a, double %b, double %c, double %d) { ; SSE-LABEL: select_fcmp_ule_f64: ; SSE: # %bb.0: -; SSE-NEXT: cmpnltsd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: andpd %xmm2, %xmm0 -; SSE-NEXT: andnpd %xmm3, %xmm1 -; SSE-NEXT: orpd %xmm0, %xmm1 -; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; SSE-NEXT: # xmm0 = mem[0],zero +; SSE-NEXT: cmpnltsd %xmm1, %xmm0 +; SSE-NEXT: movaps %xmm0, %xmm1 +; SSE-NEXT: andpd %xmm2, %xmm1 +; SSE-NEXT: andnpd %xmm3, %xmm0 +; SSE-NEXT: orpd %xmm1, %xmm0 ; SSE-NEXT: retq ; ; AVX-LABEL: select_fcmp_ule_f64: Index: llvm/test/CodeGen/X86/fast-isel-select.ll =================================================================== --- llvm/test/CodeGen/X86/fast-isel-select.ll +++ llvm/test/CodeGen/X86/fast-isel-select.ll @@ -9,11 +9,11 @@ define i32 @fastisel_select(i1 %exchSub2211_, i1 %trunc_8766) { ; CHECK-LABEL: fastisel_select: ; CHECK: ## %bb.0: -; CHECK-NEXT: ## kill: def $sil killed $sil killed $esi -; CHECK-NEXT: ## kill: def $dil killed $dil killed $edi +; CHECK-NEXT: movb %sil, %dl +; CHECK-NEXT: movb %dil, %cl ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: subb %sil, %dil -; CHECK-NEXT: testb $1, %dil +; CHECK-NEXT: subb %dl, %cl +; CHECK-NEXT: testb $1, %cl ; CHECK-NEXT: movl $1204476887, %ecx ## imm = 0x47CADBD7 ; CHECK-NEXT: cmovnel %ecx, %eax ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/fast-isel-x86-64.ll =================================================================== --- llvm/test/CodeGen/X86/fast-isel-x86-64.ll +++ llvm/test/CodeGen/X86/fast-isel-x86-64.ll @@ -299,8 +299,8 @@ ; CHECK-LABEL: test23: ; CHECK: movq %rdi, [[STACK:[0-9]+\(%rsp\)]] ; CHECK: call -; CHECK: movq [[STACK]], %rcx -; CHECK: movq %rcx, %rax +; CHECK-NEXT: movq [[STACK]], %rax +; CHECK-NEXT: addq $24, %rsp ; CHECK: ret } Index: llvm/test/CodeGen/X86/mixed-ptr-sizes-i686.ll =================================================================== --- llvm/test/CodeGen/X86/mixed-ptr-sizes-i686.ll +++ llvm/test/CodeGen/X86/mixed-ptr-sizes-i686.ll @@ -61,12 +61,12 @@ ; ; CHECK-O0-LABEL: test_sign_ext: ; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl %eax, %edx -; CHECK-O0-NEXT: sarl $31, %edx -; CHECK-O0-NEXT: movl %eax, 8(%ecx) -; CHECK-O0-NEXT: movl %edx, 12(%ecx) +; CHECK-O0-NEXT: movl %edx, %ecx +; CHECK-O0-NEXT: sarl $31, %ecx +; CHECK-O0-NEXT: movl %edx, 8(%eax) +; CHECK-O0-NEXT: movl %ecx, 12(%eax) ; CHECK-O0-NEXT: jmp _use_foo # TAILCALL entry: %0 = addrspacecast i32* %i to i32 addrspace(272)* @@ -77,13 +77,21 @@ } define dso_local void @test_zero_ext(%struct.Foo* %f, i32 addrspace(271)* %i) { -; ALL-LABEL: test_zero_ext: -; ALL: # %bb.0: # %entry -; ALL-NEXT: movl {{[0-9]+}}(%esp), %eax -; ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx -; ALL-NEXT: movl %eax, 8(%ecx) -; ALL-NEXT: movl $0, 12(%ecx) -; ALL-NEXT: jmp _use_foo # TAILCALL +; CHECK-LABEL: test_zero_ext: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %eax, 8(%ecx) +; CHECK-NEXT: movl $0, 12(%ecx) +; CHECK-NEXT: jmp _use_foo # TAILCALL +; +; CHECK-O0-LABEL: test_zero_ext: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl %ecx, 8(%eax) +; CHECK-O0-NEXT: movl $0, 12(%eax) +; CHECK-O0-NEXT: jmp _use_foo # TAILCALL entry: %0 = addrspacecast i32 addrspace(271)* %i to i32 addrspace(272)* %p64 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i32 0, i32 1 @@ -102,13 +110,10 @@ ; ; CHECK-O0-LABEL: test_trunc: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: pushl %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-O0-NEXT: movl %ecx, (%edx) -; CHECK-O0-NEXT: movl %eax, (%esp) # 4-byte Spill -; CHECK-O0-NEXT: popl %eax +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl %ecx, (%eax) ; CHECK-O0-NEXT: jmp _use_foo # TAILCALL entry: %0 = addrspacecast i32 addrspace(272)* %i to i32* @@ -119,12 +124,19 @@ } define dso_local void @test_noop1(%struct.Foo* %f, i32* %i) { -; ALL-LABEL: test_noop1: -; ALL: # %bb.0: # %entry -; ALL-NEXT: movl {{[0-9]+}}(%esp), %eax -; ALL-NEXT: movl {{[0-9]+}}(%esp), %ecx -; ALL-NEXT: movl %eax, (%ecx) -; ALL-NEXT: jmp _use_foo # TAILCALL +; CHECK-LABEL: test_noop1: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movl %eax, (%ecx) +; CHECK-NEXT: jmp _use_foo # TAILCALL +; +; CHECK-O0-LABEL: test_noop1: +; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl %ecx, (%eax) +; CHECK-O0-NEXT: jmp _use_foo # TAILCALL entry: %p32 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i32 0, i32 0 store i32* %i, i32** %p32, align 8 @@ -144,11 +156,11 @@ ; ; CHECK-O0-LABEL: test_noop2: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-O0-NEXT: movl %ecx, 8(%edx) -; CHECK-O0-NEXT: movl %eax, 12(%edx) +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl %edx, 8(%eax) +; CHECK-O0-NEXT: movl %ecx, 12(%eax) ; CHECK-O0-NEXT: jmp _use_foo # TAILCALL entry: %p64 = getelementptr inbounds %struct.Foo, %struct.Foo* %f, i32 0, i32 1 @@ -171,11 +183,11 @@ ; CHECK-O0-LABEL: test_null_arg: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: subl $12, %esp -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-O0-NEXT: movl %esp, %ecx -; CHECK-O0-NEXT: movl %eax, (%ecx) -; CHECK-O0-NEXT: movl $0, 8(%ecx) -; CHECK-O0-NEXT: movl $0, 4(%ecx) +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx +; CHECK-O0-NEXT: movl %esp, %eax +; CHECK-O0-NEXT: movl %ecx, (%eax) +; CHECK-O0-NEXT: movl $0, 8(%eax) +; CHECK-O0-NEXT: movl $0, 4(%eax) ; CHECK-O0-NEXT: calll _test_noop2 ; CHECK-O0-NEXT: addl $12, %esp ; CHECK-O0-NEXT: retl @@ -196,12 +208,12 @@ ; ; CHECK-O0-LABEL: test_unrecognized: ; CHECK-O0: # %bb.0: # %entry +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl %eax, %edx -; CHECK-O0-NEXT: sarl $31, %edx -; CHECK-O0-NEXT: movl %eax, 8(%ecx) -; CHECK-O0-NEXT: movl %edx, 12(%ecx) +; CHECK-O0-NEXT: movl %edx, %ecx +; CHECK-O0-NEXT: sarl $31, %ecx +; CHECK-O0-NEXT: movl %edx, 8(%eax) +; CHECK-O0-NEXT: movl %ecx, 12(%eax) ; CHECK-O0-NEXT: jmp _use_foo # TAILCALL entry: %0 = addrspacecast i32 addrspace(14)* %i to i32 addrspace(272)* @@ -221,13 +233,10 @@ ; ; CHECK-O0-LABEL: test_unrecognized2: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: pushl %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-O0-NEXT: movl %ecx, 16(%edx) -; CHECK-O0-NEXT: movl %eax, (%esp) # 4-byte Spill -; CHECK-O0-NEXT: popl %eax +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl %ecx, 16(%eax) ; CHECK-O0-NEXT: jmp _use_foo # TAILCALL entry: %0 = addrspacecast i32 addrspace(272)* %i to i32 addrspace(9)* @@ -238,32 +247,22 @@ } define i32 @test_load_sptr32(i32 addrspace(270)* %i) { -; CHECK-LABEL: test_load_sptr32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl (%eax), %eax -; CHECK-NEXT: retl -; CHECK-O0-LABEL: test_load_sptr32: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-O0-NEXT: movl (%eax), %eax -; CHECK-O0-NEXT: retl +; ALL-LABEL: test_load_sptr32: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movl {{[0-9]+}}(%esp), %eax +; ALL-NEXT: movl (%eax), %eax +; ALL-NEXT: retl entry: %0 = load i32, i32 addrspace(270)* %i, align 4 ret i32 %0 } define i32 @test_load_uptr32(i32 addrspace(271)* %i) { -; CHECK-LABEL: test_load_uptr32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl (%eax), %eax -; CHECK-NEXT: retl -; CHECK-O0-LABEL: test_load_uptr32: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-O0-NEXT: movl (%eax), %eax -; CHECK-O0-NEXT: retl +; ALL-LABEL: test_load_uptr32: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movl {{[0-9]+}}(%esp), %eax +; ALL-NEXT: movl (%eax), %eax +; ALL-NEXT: retl entry: %0 = load i32, i32 addrspace(271)* %i, align 4 ret i32 %0 @@ -275,15 +274,12 @@ ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl (%eax), %eax ; CHECK-NEXT: retl +; ; CHECK-O0-LABEL: test_load_ptr64: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: pushl %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl (%ecx), %ecx -; CHECK-O0-NEXT: movl %eax, (%esp) -; CHECK-O0-NEXT: movl %ecx, %eax -; CHECK-O0-NEXT: popl %ecx +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl (%eax), %eax ; CHECK-O0-NEXT: retl entry: %0 = load i32, i32 addrspace(272)* %i, align 8 @@ -297,11 +293,12 @@ ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl %eax, (%ecx) ; CHECK-NEXT: retl +; ; CHECK-O0-LABEL: test_store_sptr32: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl %eax, (%ecx) +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl %ecx, (%eax) ; CHECK-O0-NEXT: retl entry: store i32 %i, i32 addrspace(270)* %s, align 4 @@ -315,11 +312,12 @@ ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl %eax, (%ecx) ; CHECK-NEXT: retl +; ; CHECK-O0-LABEL: test_store_uptr32: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl %eax, (%ecx) +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl %ecx, (%eax) ; CHECK-O0-NEXT: retl entry: store i32 %i, i32 addrspace(271)* %s, align 4 @@ -333,12 +331,13 @@ ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ; CHECK-NEXT: movl %eax, (%ecx) ; CHECK-NEXT: retl +; ; CHECK-O0-LABEL: test_store_ptr64: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-O0-NEXT: movl {{[0-9]+}}(%esp), %edx -; CHECK-O0-NEXT: movl %edx, (%ecx) +; CHECK-O0-NEXT: movl %ecx, (%eax) ; CHECK-O0-NEXT: retl entry: store i32 %i, i32 addrspace(272)* %s, align 8 Index: llvm/test/CodeGen/X86/mixed-ptr-sizes.ll =================================================================== --- llvm/test/CodeGen/X86/mixed-ptr-sizes.ll +++ llvm/test/CodeGen/X86/mixed-ptr-sizes.ll @@ -88,8 +88,8 @@ ; ; CHECK-O0-LABEL: test_trunc: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $edx killed $edx killed $rdx -; CHECK-O0-NEXT: movl %edx, (%rcx) +; CHECK-O0-NEXT: movl %edx, %eax +; CHECK-O0-NEXT: movl %eax, (%rcx) ; CHECK-O0-NEXT: jmp use_foo # TAILCALL entry: %0 = addrspacecast i32* %i to i32 addrspace(270)* @@ -125,23 +125,19 @@ ; Test that null can be passed as a 32-bit pointer. define dso_local void @test_null_arg(%struct.Foo* %f) { -; CHECK-LABEL: test_null_arg: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $40, %rsp -; CHECK: xorl %edx, %edx -; CHECK-NEXT: callq test_noop1 -; CHECK-NEXT: nop -; CHECK-NEXT: addq $40, %rsp -; CHECK-NEXT: retq -; -; CHECK-O0-LABEL: test_null_arg: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: subq $40, %rsp -; CHECK-O0: xorl %edx, %edx -; CHECK-O0-NEXT: callq test_noop1 -; CHECK-O0-NEXT: nop -; CHECK-O0-NEXT: addq $40, %rsp -; CHECK-O0-NEXT: retq +; ALL-LABEL: test_null_arg: +; ALL: # %bb.0: # %entry +; ALL-NEXT: subq $40, %rsp +; ALL-NEXT: .seh_stackalloc 40 +; ALL-NEXT: .seh_endprologue +; ALL-NEXT: xorl %edx, %edx +; ALL-NEXT: callq test_noop1 +; ALL-NEXT: nop +; ALL-NEXT: addq $40, %rsp +; ALL-NEXT: retq +; ALL-NEXT: .seh_handlerdata +; ALL-NEXT: .text +; ALL-NEXT: .seh_endproc entry: call void @test_noop1(%struct.Foo* %f, i32 addrspace(270)* null) ret void @@ -156,8 +152,8 @@ ; ; CHECK-O0-LABEL: test_unrecognized: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: # kill: def $edx killed $edx killed $rdx -; CHECK-O0-NEXT: movl %edx, (%rcx) +; CHECK-O0-NEXT: movl %edx, %eax +; CHECK-O0-NEXT: movl %eax, (%rcx) ; CHECK-O0-NEXT: jmp use_foo # TAILCALL entry: %0 = addrspacecast i32 addrspace(14)* %i to i32 addrspace(270)* @@ -189,16 +185,11 @@ } define i32 @test_load_sptr32(i32 addrspace(270)* %i) { -; CHECK-LABEL: test_load_sptr32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movslq %ecx, %rax -; CHECK-NEXT: movl (%rax), %eax -; CHECK-NEXT: retq -; CHECK-O0-LABEL: test_load_sptr32: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movslq %ecx, %rax -; CHECK-O0-NEXT: movl (%rax), %eax -; CHECK-O0-NEXT: retq +; ALL-LABEL: test_load_sptr32: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movslq %ecx, %rax +; ALL-NEXT: movl (%rax), %eax +; ALL-NEXT: retq entry: %0 = load i32, i32 addrspace(270)* %i, align 4 ret i32 %0 @@ -210,6 +201,7 @@ ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl (%rax), %eax ; CHECK-NEXT: retq +; ; CHECK-O0-LABEL: test_load_uptr32: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: movl %ecx, %eax @@ -222,30 +214,21 @@ } define i32 @test_load_ptr64(i32 addrspace(272)* %i) { -; CHECK-LABEL: test_load_ptr64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl (%rcx), %eax -; CHECK-NEXT: retq -; CHECK-O0-LABEL: test_load_ptr64: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl (%rcx), %eax -; CHECK-O0-NEXT: retq +; ALL-LABEL: test_load_ptr64: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movl (%rcx), %eax +; ALL-NEXT: retq entry: %0 = load i32, i32 addrspace(272)* %i, align 8 ret i32 %0 } define void @test_store_sptr32(i32 addrspace(270)* %s, i32 %i) { -; CHECK-LABEL: test_store_sptr32: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movslq %ecx, %rax -; CHECK-NEXT: movl %edx, (%rax) -; CHECK-NEXT: retq -; CHECK-O0-LABEL: test_store_sptr32: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movslq %ecx, %rax -; CHECK-O0-NEXT: movl %edx, (%rax) -; CHECK-O0-NEXT: retq +; ALL-LABEL: test_store_sptr32: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movslq %ecx, %rax +; ALL-NEXT: movl %edx, (%rax) +; ALL-NEXT: retq entry: store i32 %i, i32 addrspace(270)* %s, align 4 ret void @@ -257,6 +240,7 @@ ; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: movl %edx, (%rax) ; CHECK-NEXT: retq +; ; CHECK-O0-LABEL: test_store_uptr32: ; CHECK-O0: # %bb.0: # %entry ; CHECK-O0-NEXT: movl %ecx, %eax @@ -269,14 +253,10 @@ } define void @test_store_ptr64(i32 addrspace(272)* %s, i32 %i) { -; CHECK-LABEL: test_store_ptr64: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl %edx, (%rcx) -; CHECK-NEXT: retq -; CHECK-O0-LABEL: test_store_ptr64: -; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: movl %edx, (%rcx) -; CHECK-O0-NEXT: retq +; ALL-LABEL: test_store_ptr64: +; ALL: # %bb.0: # %entry +; ALL-NEXT: movl %edx, (%rcx) +; ALL-NEXT: retq entry: store i32 %i, i32 addrspace(272)* %s, align 8 ret void Index: llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll =================================================================== --- llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll +++ llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll @@ -1,7 +1,6 @@ ; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -optimize-regalloc=0 -no-x86-call-frame-opt | FileCheck %s ; RUN: llc -O0 < %s -stack-symbol-ordering=0 -mtriple=i386-apple-darwin9 -mcpu=generic -regalloc=fast -no-x86-call-frame-opt | FileCheck %s -; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 -no-x86-call-frame-opt | FileCheck -check-prefix=ATOM %s -; CHECKed instructions should be the same with or without -O0 except on Intel Atom due to instruction scheduling. +; RUN: llc < %s -stack-symbol-ordering=0 -mtriple=i386-apple-darwin9 -mcpu=atom -regalloc=fast -optimize-regalloc=0 -no-x86-call-frame-opt | FileCheck %s @.str = private constant [12 x i8] c"x + y = %i\0A\00", align 1 ; <[12 x i8]*> [#uses=1] @@ -17,18 +16,6 @@ ; CHECK-NOT: movl ; CHECK: addl %ebx, %eax -; On Intel Atom the scheduler moves a movl instruction -; used for the printf call to follow movl 24(%esp), %eax -; ATOM: movl 24(%esp), %eax -; ATOM-NOT: movl -; ATOM: movl %eax, 36(%esp) -; ATOM: movl -; ATOM: movl 28(%esp), %ebx -; ATOM-NOT: movl -; ATOM: movl %ebx, 40(%esp) -; ATOM-NOT: movl -; ATOM: addl %ebx, %eax - %retval = alloca i32 ; [#uses=2] %"%ebx" = alloca i32 ; [#uses=1] %"%eax" = alloca i32 ; [#uses=2] Index: llvm/test/CodeGen/X86/pr11415.ll =================================================================== --- llvm/test/CodeGen/X86/pr11415.ll +++ llvm/test/CodeGen/X86/pr11415.ll @@ -6,12 +6,11 @@ ; CHECK: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movq %rcx, %rax -; CHECK-NEXT: movq %rax, -8(%rsp) -; CHECK-NEXT: movq -8(%rsp), %rdx +; CHECK-NEXT: movq %rcx, %rdx ; CHECK-NEXT: #APP ; CHECK-NEXT: #NO_APP -; CHECK-NEXT: movq %rdx, %rax +; CHECK-NEXT: movq %rcx, -8(%rsp) +; CHECK-NEXT: movq -8(%rsp), %rax ; CHECK-NEXT: ret define i64 @foo() { Index: llvm/test/CodeGen/X86/pr1489.ll =================================================================== --- llvm/test/CodeGen/X86/pr1489.ll +++ llvm/test/CodeGen/X86/pr1489.ll @@ -110,28 +110,25 @@ ; CHECK-NEXT: movl %esp, %ebp ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: subl $48, %esp +; CHECK-NEXT: subl $32, %esp ; CHECK-NEXT: calll _baz -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %eax, %edi ; CHECK-NEXT: calll _bar -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %eax, %esi ; CHECK-NEXT: calll _foo ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: calll _quux -; CHECK-NEXT: movl %esp, %ecx ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload -; CHECK-NEXT: movl %edx, 16(%ecx) -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload -; CHECK-NEXT: movl %esi, 12(%ecx) -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload -; CHECK-NEXT: movl %edi, 8(%ecx) -; CHECK-NEXT: movl %eax, 4(%ecx) -; CHECK-NEXT: movl $_.str, (%ecx) +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl %edi, 16(%eax) +; CHECK-NEXT: movl %esi, 12(%eax) +; CHECK-NEXT: movl %edx, 8(%eax) +; CHECK-NEXT: movl %ecx, 4(%eax) +; CHECK-NEXT: movl $_.str, (%eax) ; CHECK-NEXT: calll _printf -; CHECK-NEXT: ## implicit-def: $ecx -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: addl $48, %esp +; CHECK-NEXT: ## implicit-def: $eax +; CHECK-NEXT: addl $32, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi ; CHECK-NEXT: popl %ebp Index: llvm/test/CodeGen/X86/pr27591.ll =================================================================== --- llvm/test/CodeGen/X86/pr27591.ll +++ llvm/test/CodeGen/X86/pr27591.ll @@ -9,9 +9,8 @@ ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: cmpl $0, %edi ; CHECK-NEXT: setne %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movzbl %al, %edi +; CHECK-NEXT: andl $1, %edi ; CHECK-NEXT: callq callee1 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq @@ -27,10 +26,9 @@ ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: cmpl $0, %edi ; CHECK-NEXT: setne %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: andl $1, %eax -; CHECK-NEXT: negl %eax -; CHECK-NEXT: movl %eax, %edi +; CHECK-NEXT: movzbl %al, %edi +; CHECK-NEXT: andl $1, %edi +; CHECK-NEXT: negl %edi ; CHECK-NEXT: callq callee2 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/pr30430.ll =================================================================== --- llvm/test/CodeGen/X86/pr30430.ll +++ llvm/test/CodeGen/X86/pr30430.ll @@ -12,13 +12,13 @@ ; CHECK-NEXT: andq $-64, %rsp ; CHECK-NEXT: subq $256, %rsp # imm = 0x100 ; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp) @@ -27,75 +27,75 @@ ; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm15 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm14 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm13 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm12 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm11 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm10 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm9 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm8 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm7 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm16 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm17 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm18 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm19 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm20 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm21 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm22 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm23 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm4, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm16, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm17, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm18, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm19, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm20, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm21, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm22, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovss %xmm23, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss {{.*#+}} xmm6 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm5 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm4 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss %xmm15, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm14, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm13, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm12, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm11, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm10, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm9, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm8, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm7, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm6, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm5, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm4, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm3, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm2, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm1, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovss %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero ; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm0[0,1,2],xmm1[0] ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] -; CHECK-NEXT: # implicit-def: $ymm2 -; CHECK-NEXT: vmovaps %xmm1, %xmm2 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0,1,2],xmm2[0] +; CHECK-NEXT: # implicit-def: $ymm0 +; CHECK-NEXT: vmovaps %xmm2, %xmm0 +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] -; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[2,3] ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3] ; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm0[0,1,2],xmm2[0] ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] +; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[2,3] ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3] ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] -; CHECK-NEXT: # implicit-def: $ymm3 -; CHECK-NEXT: vmovaps %xmm2, %xmm3 -; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 -; CHECK-NEXT: # implicit-def: $zmm2 -; CHECK-NEXT: vmovaps %ymm1, %ymm2 -; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0 +; CHECK-NEXT: vinsertps {{.*#+}} xmm3 = xmm0[0,1,2],xmm3[0] +; CHECK-NEXT: # implicit-def: $ymm0 +; CHECK-NEXT: vmovaps %xmm3, %xmm0 +; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm2 +; CHECK-NEXT: # implicit-def: $zmm0 +; CHECK-NEXT: vmovaps %ymm2, %ymm0 +; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0 ; CHECK-NEXT: movq %rbp, %rsp Index: llvm/test/CodeGen/X86/pr30813.ll =================================================================== --- llvm/test/CodeGen/X86/pr30813.ll +++ llvm/test/CodeGen/X86/pr30813.ll @@ -1,8 +1,9 @@ ; RUN: llc -mtriple=x86_64-linux-gnu -O0 %s -o - | FileCheck %s ; CHECK: patatino: ; CHECK: .cfi_startproc -; CHECK: movzwl (%rax), %e[[REG0:[abcd]x]] -; CHECK: movq %r[[REG0]], ({{%r[abcd]x}}) +; CHECK: movzwl (%rax), [[REG0:%e[abcd]x]] +; CHECK: movl [[REG0]], %e[[REG1C:[abcd]]]x +; CHECK: movq %r[[REG1C]]x, ({{%r[abcd]x}}) ; CHECK: retq define void @patatino() { Index: llvm/test/CodeGen/X86/pr32241.ll =================================================================== --- llvm/test/CodeGen/X86/pr32241.ll +++ llvm/test/CodeGen/X86/pr32241.ll @@ -10,10 +10,10 @@ ; CHECK-NEXT: movw $-15498, {{[0-9]+}}(%esp) # imm = 0xC376 ; CHECK-NEXT: movw $19417, {{[0-9]+}}(%esp) # imm = 0x4BD9 ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: cmpw $0, {{[0-9]+}}(%esp) -; CHECK-NEXT: movb $1, %cl ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill +; CHECK-NEXT: cmpw $0, {{[0-9]+}}(%esp) +; CHECK-NEXT: movb $1, %al +; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: jne .LBB0_2 ; CHECK-NEXT: # %bb.1: # %lor.rhs ; CHECK-NEXT: xorl %eax, %eax @@ -21,11 +21,11 @@ ; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: jmp .LBB0_2 ; CHECK-NEXT: .LBB0_2: # %lor.end -; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload -; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %eax -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-NEXT: cmpl %eax, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; CHECK-NEXT: andb $1, %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: cmpl %ecx, %eax ; CHECK-NEXT: setl %al ; CHECK-NEXT: andb $1, %al ; CHECK-NEXT: movzbl %al, %eax Index: llvm/test/CodeGen/X86/pr32284.ll =================================================================== --- llvm/test/CodeGen/X86/pr32284.ll +++ llvm/test/CodeGen/X86/pr32284.ll @@ -178,17 +178,8 @@ ; ; 686-O0-LABEL: f1: ; 686-O0: # %bb.0: # %entry -; 686-O0-NEXT: pushl %ebx -; 686-O0-NEXT: .cfi_def_cfa_offset 8 -; 686-O0-NEXT: pushl %edi -; 686-O0-NEXT: .cfi_def_cfa_offset 12 -; 686-O0-NEXT: pushl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 16 ; 686-O0-NEXT: subl $1, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 17 -; 686-O0-NEXT: .cfi_offset %esi, -16 -; 686-O0-NEXT: .cfi_offset %edi, -12 -; 686-O0-NEXT: .cfi_offset %ebx, -8 +; 686-O0-NEXT: .cfi_def_cfa_offset 5 ; 686-O0-NEXT: movl var_5, %eax ; 686-O0-NEXT: movl %eax, %ecx ; 686-O0-NEXT: sarl $31, %ecx @@ -197,33 +188,27 @@ ; 686-O0-NEXT: orl %ecx, %eax ; 686-O0-NEXT: setne (%esp) ; 686-O0-NEXT: movl var_5, %ecx +; 686-O0-NEXT: movl %ecx, %eax +; 686-O0-NEXT: sarl $31, %eax ; 686-O0-NEXT: movl %ecx, %edx -; 686-O0-NEXT: sarl $31, %edx -; 686-O0-NEXT: movl %ecx, %esi -; 686-O0-NEXT: subl $-1, %esi -; 686-O0-NEXT: sete %bl -; 686-O0-NEXT: movzbl %bl, %edi +; 686-O0-NEXT: subl $-1, %edx +; 686-O0-NEXT: sete %dl +; 686-O0-NEXT: movzbl %dl, %edx ; 686-O0-NEXT: addl $7093, %ecx # imm = 0x1BB5 -; 686-O0-NEXT: adcl $0, %edx -; 686-O0-NEXT: subl %edi, %ecx -; 686-O0-NEXT: sbbl $0, %edx -; 686-O0-NEXT: setl %bl -; 686-O0-NEXT: movzbl %bl, %edi -; 686-O0-NEXT: movl %edi, var_57 +; 686-O0-NEXT: adcl $0, %eax +; 686-O0-NEXT: subl %edx, %ecx +; 686-O0-NEXT: sbbl $0, %eax +; 686-O0-NEXT: setl %al +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movl %eax, var_57 ; 686-O0-NEXT: movl $0, var_57+4 -; 686-O0-NEXT: movl var_5, %edi -; 686-O0-NEXT: subl $-1, %edi -; 686-O0-NEXT: sete %bl -; 686-O0-NEXT: movzbl %bl, %ebx -; 686-O0-NEXT: movl %ebx, _ZN8struct_210member_2_0E +; 686-O0-NEXT: movl var_5, %eax +; 686-O0-NEXT: subl $-1, %eax +; 686-O0-NEXT: sete %al +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movl %eax, _ZN8struct_210member_2_0E ; 686-O0-NEXT: movl $0, _ZN8struct_210member_2_0E+4 ; 686-O0-NEXT: addl $1, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 16 -; 686-O0-NEXT: popl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 12 -; 686-O0-NEXT: popl %edi -; 686-O0-NEXT: .cfi_def_cfa_offset 8 -; 686-O0-NEXT: popl %ebx ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl ; @@ -321,9 +306,9 @@ ; X86-O0-NEXT: sete %al ; X86-O0-NEXT: andb $1, %al ; X86-O0-NEXT: movzbl %al, %eax -; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax -; X86-O0-NEXT: # implicit-def: $rcx -; X86-O0-NEXT: movw %ax, (%rcx) +; X86-O0-NEXT: movw %ax, %cx +; X86-O0-NEXT: # implicit-def: $rax +; X86-O0-NEXT: movw %cx, (%rax) ; X86-O0-NEXT: retq ; ; X64-LABEL: f2: @@ -368,9 +353,9 @@ ; 686-O0-NEXT: sete %al ; 686-O0-NEXT: andb $1, %al ; 686-O0-NEXT: movzbl %al, %eax -; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax -; 686-O0-NEXT: # implicit-def: $ecx -; 686-O0-NEXT: movw %ax, (%ecx) +; 686-O0-NEXT: movw %ax, %cx +; 686-O0-NEXT: # implicit-def: $eax +; 686-O0-NEXT: movw %cx, (%eax) ; 686-O0-NEXT: addl $2, %esp ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl @@ -488,18 +473,18 @@ ; 686-O0-NEXT: andl $-8, %esp ; 686-O0-NEXT: subl $16, %esp ; 686-O0-NEXT: .cfi_offset %esi, -12 -; 686-O0-NEXT: movl var_13, %eax -; 686-O0-NEXT: movl %eax, %ecx -; 686-O0-NEXT: notl %ecx -; 686-O0-NEXT: testl %eax, %eax -; 686-O0-NEXT: sete %al -; 686-O0-NEXT: movzbl %al, %eax -; 686-O0-NEXT: movl var_16, %edx -; 686-O0-NEXT: movl %ecx, %esi -; 686-O0-NEXT: xorl %edx, %esi -; 686-O0-NEXT: andl %esi, %eax -; 686-O0-NEXT: orl %eax, %ecx -; 686-O0-NEXT: movl %ecx, (%esp) +; 686-O0-NEXT: movl var_13, %ecx +; 686-O0-NEXT: movl %ecx, %eax +; 686-O0-NEXT: notl %eax +; 686-O0-NEXT: testl %ecx, %ecx +; 686-O0-NEXT: sete %cl +; 686-O0-NEXT: movzbl %cl, %ecx +; 686-O0-NEXT: movl var_16, %esi +; 686-O0-NEXT: movl %eax, %edx +; 686-O0-NEXT: xorl %esi, %edx +; 686-O0-NEXT: andl %edx, %ecx +; 686-O0-NEXT: orl %ecx, %eax +; 686-O0-NEXT: movl %eax, (%esp) ; 686-O0-NEXT: movl $0, {{[0-9]+}}(%esp) ; 686-O0-NEXT: movl var_13, %eax ; 686-O0-NEXT: notl %eax Index: llvm/test/CodeGen/X86/pr32340.ll =================================================================== --- llvm/test/CodeGen/X86/pr32340.ll +++ llvm/test/CodeGen/X86/pr32340.ll @@ -16,26 +16,26 @@ ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: # kill: def $rax killed $eax ; X64-NEXT: movw $0, var_825 -; X64-NEXT: movzwl var_32, %ecx -; X64-NEXT: movzwl var_901, %edx -; X64-NEXT: movl %ecx, %esi -; X64-NEXT: xorl %edx, %esi -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: xorl %esi, %edx -; X64-NEXT: addl %ecx, %edx -; X64-NEXT: movslq %edx, %rcx +; X64-NEXT: movzwl var_32, %edx +; X64-NEXT: movzwl var_901, %ecx +; X64-NEXT: movl %edx, %esi +; X64-NEXT: xorl %ecx, %esi +; X64-NEXT: movl %edx, %ecx +; X64-NEXT: xorl %esi, %ecx +; X64-NEXT: addl %edx, %ecx +; X64-NEXT: movslq %ecx, %rcx ; X64-NEXT: movq %rcx, var_826 ; X64-NEXT: movzwl var_32, %ecx ; X64-NEXT: # kill: def $rcx killed $ecx ; X64-NEXT: movzwl var_901, %edx ; X64-NEXT: xorl $51981, %edx # imm = 0xCB0D -; X64-NEXT: movslq %edx, %rdx -; X64-NEXT: movabsq $-1142377792914660288, %rsi # imm = 0xF02575732E06E440 -; X64-NEXT: xorq %rsi, %rdx -; X64-NEXT: movq %rcx, %rsi +; X64-NEXT: movslq %edx, %rsi +; X64-NEXT: movabsq $-1142377792914660288, %rdx # imm = 0xF02575732E06E440 ; X64-NEXT: xorq %rdx, %rsi -; X64-NEXT: xorq $-1, %rsi -; X64-NEXT: xorq %rsi, %rcx +; X64-NEXT: movq %rcx, %rdx +; X64-NEXT: xorq %rsi, %rdx +; X64-NEXT: xorq $-1, %rdx +; X64-NEXT: xorq %rdx, %rcx ; X64-NEXT: movq %rcx, %rdx ; X64-NEXT: orq var_57, %rdx ; X64-NEXT: orq %rdx, %rcx Index: llvm/test/CodeGen/X86/pr32345.ll =================================================================== --- llvm/test/CodeGen/X86/pr32345.ll +++ llvm/test/CodeGen/X86/pr32345.ll @@ -29,9 +29,9 @@ ; X640-NEXT: # kill: def $rcx killed $ecx ; X640-NEXT: # kill: def $cl killed $rcx ; X640-NEXT: sarq %cl, %rax -; X640-NEXT: # kill: def $al killed $al killed $rax -; X640-NEXT: # implicit-def: $rcx -; X640-NEXT: movb %al, (%rcx) +; X640-NEXT: movb %al, %cl +; X640-NEXT: # implicit-def: $rax +; X640-NEXT: movb %cl, (%rax) ; X640-NEXT: retq ; ; 6860-LABEL: foo: @@ -43,44 +43,44 @@ ; 6860-NEXT: .cfi_def_cfa_register %ebp ; 6860-NEXT: andl $-8, %esp ; 6860-NEXT: subl $24, %esp -; 6860-NEXT: movw var_22, %ax +; 6860-NEXT: movw var_22, %dx ; 6860-NEXT: movzwl var_27, %ecx -; 6860-NEXT: movw %cx, %dx -; 6860-NEXT: xorw %dx, %ax -; 6860-NEXT: # implicit-def: $edx -; 6860-NEXT: movw %ax, %dx -; 6860-NEXT: xorl %ecx, %edx -; 6860-NEXT: # kill: def $dx killed $dx killed $edx -; 6860-NEXT: movzwl %dx, %eax +; 6860-NEXT: movw %cx, %ax +; 6860-NEXT: xorw %ax, %dx +; 6860-NEXT: # implicit-def: $eax +; 6860-NEXT: movw %dx, %ax +; 6860-NEXT: xorl %ecx, %eax +; 6860-NEXT: # kill: def $ax killed $ax killed $eax +; 6860-NEXT: movzwl %ax, %eax ; 6860-NEXT: movl %eax, {{[0-9]+}}(%esp) ; 6860-NEXT: movl $0, {{[0-9]+}}(%esp) -; 6860-NEXT: movw var_22, %ax -; 6860-NEXT: movzwl var_27, %ecx -; 6860-NEXT: movw %cx, %dx -; 6860-NEXT: xorw %dx, %ax -; 6860-NEXT: # implicit-def: $edx -; 6860-NEXT: movw %ax, %dx -; 6860-NEXT: xorl %ecx, %edx -; 6860-NEXT: # kill: def $dx killed $dx killed $edx -; 6860-NEXT: movzwl %dx, %eax -; 6860-NEXT: # kill: def $cl killed $cl killed $ecx +; 6860-NEXT: movw var_22, %dx +; 6860-NEXT: movzwl var_27, %eax +; 6860-NEXT: movw %ax, %cx +; 6860-NEXT: xorw %cx, %dx +; 6860-NEXT: # implicit-def: $ecx +; 6860-NEXT: movw %dx, %cx +; 6860-NEXT: xorl %eax, %ecx +; 6860-NEXT: # kill: def $cx killed $cx killed $ecx +; 6860-NEXT: movzwl %cx, %edx +; 6860-NEXT: movb %al, %cl ; 6860-NEXT: addb $30, %cl -; 6860-NEXT: xorl %edx, %edx ; 6860-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; 6860-NEXT: shrdl %cl, %edx, %eax +; 6860-NEXT: xorl %eax, %eax +; 6860-NEXT: shrdl %cl, %eax, %edx ; 6860-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload +; 6860-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: testb $32, %cl ; 6860-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; 6860-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: jne .LBB0_2 ; 6860-NEXT: # %bb.1: # %bb ; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; 6860-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: .LBB0_2: # %bb ; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; 6860-NEXT: # kill: def $al killed $al killed $eax -; 6860-NEXT: # implicit-def: $ecx -; 6860-NEXT: movb %al, (%ecx) +; 6860-NEXT: movb %al, %cl +; 6860-NEXT: # implicit-def: $eax +; 6860-NEXT: movb %cl, (%eax) ; 6860-NEXT: movl %ebp, %esp ; 6860-NEXT: popl %ebp ; 6860-NEXT: .cfi_def_cfa %esp, 4 Index: llvm/test/CodeGen/X86/pr32451.ll =================================================================== --- llvm/test/CodeGen/X86/pr32451.ll +++ llvm/test/CodeGen/X86/pr32451.ll @@ -9,24 +9,24 @@ define i8** @japi1_convert_690(i8**, i8***, i32) { ; CHECK-LABEL: japi1_convert_690: ; CHECK: # %bb.0: # %top -; CHECK-NEXT: subl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: subl $12, %esp +; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll julia.gc_root_decl ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll jl_get_ptls_states -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-NEXT: movl 4(%ecx), %edx -; CHECK-NEXT: movb (%edx), %dl -; CHECK-NEXT: andb $1, %dl -; CHECK-NEXT: movzbl %dl, %edx -; CHECK-NEXT: movl %edx, (%esp) -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: # kill: def $ecx killed $eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: movl 4(%eax), %eax +; CHECK-NEXT: movb (%eax), %al +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: movl %eax, (%esp) ; CHECK-NEXT: calll jl_box_int32 ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %eax, (%ecx) -; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl top: Index: llvm/test/CodeGen/X86/pr32484.ll =================================================================== --- llvm/test/CodeGen/X86/pr32484.ll +++ llvm/test/CodeGen/X86/pr32484.ll @@ -8,9 +8,9 @@ ; CHECK-NEXT: jmpq *%rax ; CHECK-NEXT: .LBB0_1: ; CHECK-NEXT: xorps %xmm0, %xmm0 -; CHECK-NEXT: pcmpeqd %xmm1, %xmm1 +; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 ; CHECK-NEXT: # implicit-def: $rax -; CHECK-NEXT: movdqu %xmm1, (%rax) +; CHECK-NEXT: movdqu %xmm0, (%rax) ; CHECK-NEXT: .LBB0_2: ; CHECK-NEXT: retq indirectbr i8* undef, [label %9, label %1] Index: llvm/test/CodeGen/X86/pr34592.ll =================================================================== --- llvm/test/CodeGen/X86/pr34592.ll +++ llvm/test/CodeGen/X86/pr34592.ll @@ -10,44 +10,42 @@ ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: andq $-32, %rsp -; CHECK-NEXT: subq $160, %rsp -; CHECK-NEXT: vmovaps 240(%rbp), %ymm8 -; CHECK-NEXT: vmovaps 208(%rbp), %ymm9 -; CHECK-NEXT: vmovaps 176(%rbp), %ymm10 -; CHECK-NEXT: vmovaps 144(%rbp), %ymm11 -; CHECK-NEXT: vmovaps 112(%rbp), %ymm12 -; CHECK-NEXT: vmovaps 80(%rbp), %ymm13 -; CHECK-NEXT: vmovaps 48(%rbp), %ymm14 -; CHECK-NEXT: vmovaps 16(%rbp), %ymm15 -; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3,4,5],ymm2[6,7] -; CHECK-NEXT: vmovaps %ymm9, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: # implicit-def: $ymm0 -; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0 -; CHECK-NEXT: vpalignr {{.*#+}} ymm2 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23] -; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,3,2,0] -; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5],ymm2[6,7] +; CHECK-NEXT: subq $32, %rsp +; CHECK-NEXT: vmovaps %ymm4, %ymm10 +; CHECK-NEXT: vmovaps %ymm3, %ymm9 +; CHECK-NEXT: vmovaps %ymm1, %ymm8 +; CHECK-NEXT: vmovaps %ymm0, %ymm4 +; CHECK-NEXT: vmovaps 240(%rbp), %ymm1 +; CHECK-NEXT: vmovaps 208(%rbp), %ymm3 +; CHECK-NEXT: vmovaps 176(%rbp), %ymm0 +; CHECK-NEXT: vmovaps 144(%rbp), %ymm0 +; CHECK-NEXT: vmovaps 112(%rbp), %ymm11 +; CHECK-NEXT: vmovaps 80(%rbp), %ymm11 +; CHECK-NEXT: vmovaps 48(%rbp), %ymm11 +; CHECK-NEXT: vmovaps 16(%rbp), %ymm11 +; CHECK-NEXT: vpblendd {{.*#+}} ymm4 = ymm6[0,1,2,3,4,5],ymm2[6,7] +; CHECK-NEXT: vmovaps %xmm3, %xmm8 +; CHECK-NEXT: # implicit-def: $ymm2 +; CHECK-NEXT: vinserti128 $1, %xmm8, %ymm2, %ymm2 +; CHECK-NEXT: vpalignr {{.*#+}} ymm0 = ymm4[8,9,10,11,12,13,14,15],ymm0[0,1,2,3,4,5,6,7],ymm4[24,25,26,27,28,29,30,31],ymm0[16,17,18,19,20,21,22,23] +; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,2,0] +; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm2[4,5],ymm0[6,7] ; CHECK-NEXT: vmovaps %xmm7, %xmm2 -; CHECK-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] -; CHECK-NEXT: # implicit-def: $ymm9 -; CHECK-NEXT: vmovaps %xmm2, %xmm9 -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload -; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] -; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3] -; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm11[4,5,6,7] -; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7] -; CHECK-NEXT: vpermq {{.*#+}} ymm8 = ymm8[2,1,1,3] -; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5] -; CHECK-NEXT: vpblendd {{.*#+}} ymm5 = ymm8[0,1,2,3,4,5],ymm5[6,7] -; CHECK-NEXT: vextracti128 $1, %ymm7, %xmm7 -; CHECK-NEXT: vmovq {{.*#+}} xmm7 = xmm7[0],zero -; CHECK-NEXT: # implicit-def: $ymm8 -; CHECK-NEXT: vmovaps %xmm7, %xmm8 -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm8[0,1],ymm6[0,1] -; CHECK-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm5, %ymm1 -; CHECK-NEXT: vmovaps %ymm3, (%rsp) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm9, %ymm3 +; CHECK-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] +; CHECK-NEXT: # implicit-def: $ymm2 +; CHECK-NEXT: vmovaps %xmm4, %xmm2 +; CHECK-NEXT: vpalignr {{.*#+}} ymm3 = ymm3[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm3[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] +; CHECK-NEXT: vpermq {{.*#+}} ymm3 = ymm3[0,1,0,3] +; CHECK-NEXT: vpblendd {{.*#+}} ymm3 = ymm2[0,1,2,3],ymm3[4,5,6,7] +; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm7[0,1],ymm1[2,3],ymm7[4,5,6,7] +; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[2,1,1,3] +; CHECK-NEXT: vpshufd {{.*#+}} ymm2 = ymm5[0,1,0,1,4,5,4,5] +; CHECK-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3,4,5],ymm2[6,7] +; CHECK-NEXT: vextracti128 $1, %ymm7, %xmm2 +; CHECK-NEXT: vmovq {{.*#+}} xmm4 = xmm2[0],zero +; CHECK-NEXT: # implicit-def: $ymm2 +; CHECK-NEXT: vmovaps %xmm4, %xmm2 +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm2[0,1],ymm6[0,1] ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 Index: llvm/test/CodeGen/X86/pr34653.ll =================================================================== --- llvm/test/CodeGen/X86/pr34653.ll +++ llvm/test/CodeGen/X86/pr34653.ll @@ -12,52 +12,46 @@ ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: andq $-512, %rsp # imm = 0xFE00 -; CHECK-NEXT: subq $1536, %rsp # imm = 0x600 -; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; CHECK-NEXT: subq $1024, %rsp # imm = 0x400 +; CHECK-NEXT: movq %rsp, %rdi ; CHECK-NEXT: callq test ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm4 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm5 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm6 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm7 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm8 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm9 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm10 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm11 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm12 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm13 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm14 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm15 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm16 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm17 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm18 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm19 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm20 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm21 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm22 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm23 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm24 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm25 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm26 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm27 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm28 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm29 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm30 = mem[0],zero -; CHECK-NEXT: vmovsd {{.*#+}} xmm31 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp Index: llvm/test/CodeGen/X86/pr39733.ll =================================================================== --- llvm/test/CodeGen/X86/pr39733.ll +++ llvm/test/CodeGen/X86/pr39733.ll @@ -17,13 +17,13 @@ ; CHECK-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0 ; CHECK-NEXT: vmovdqa %xmm0, {{[0-9]+}}(%rsp) -; CHECK-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm0 -; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1 -; CHECK-NEXT: # implicit-def: $ymm2 -; CHECK-NEXT: vmovaps %xmm1, %xmm2 -; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; CHECK-NEXT: vmovdqa {{[0-9]+}}(%rsp), %xmm1 +; CHECK-NEXT: vpmovsxwd %xmm1, %xmm2 +; CHECK-NEXT: # implicit-def: $ymm0 +; CHECK-NEXT: vmovaps %xmm2, %xmm0 +; CHECK-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] +; CHECK-NEXT: vpmovsxwd %xmm1, %xmm1 +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 ; CHECK-NEXT: vmovdqa %ymm0, (%rsp) ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp Index: llvm/test/CodeGen/X86/pr42452.ll =================================================================== --- llvm/test/CodeGen/X86/pr42452.ll +++ llvm/test/CodeGen/X86/pr42452.ll @@ -6,12 +6,12 @@ define void @foo(i1 %c, <2 x i64> %x) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $dil killed $dil killed $edi +; CHECK-NEXT: movb %dil, %al +; CHECK-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: movq %xmm0, %rax ; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] -; CHECK-NEXT: movq %xmm0, %rcx -; CHECK-NEXT: movb %dil, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill -; CHECK-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movq %xmm0, %rax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-NEXT: .LBB0_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: movb {{[-0-9]+}}(%r{{[sb]}}p), %al # 1-byte Reload Index: llvm/test/CodeGen/X86/pr44749.ll =================================================================== --- llvm/test/CodeGen/X86/pr44749.ll +++ llvm/test/CodeGen/X86/pr44749.ll @@ -4,33 +4,29 @@ define i32 @a() { ; CHECK-LABEL: a: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: xorl %eax, %eax ; CHECK-NEXT: ## kill: def $al killed $al killed $eax -; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: callq _b ; CHECK-NEXT: cvtsi2sd %eax, %xmm0 ; CHECK-NEXT: movq _calloc@{{.*}}(%rip), %rax ; CHECK-NEXT: subq $-1, %rax -; CHECK-NEXT: setne %cl -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: ## kill: def $rcx killed $ecx -; CHECK-NEXT: leaq {{.*}}(%rip), %rdx +; CHECK-NEXT: setne %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: leaq {{.*}}(%rip), %rax ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: setae %cl -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: ## kill: def $rcx killed $ecx -; CHECK-NEXT: leaq {{.*}}(%rip), %rdx +; CHECK-NEXT: setae %al +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: leaq {{.*}}(%rip), %rax ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: cvttsd2si %xmm0, %ecx -; CHECK-NEXT: movq %rax, (%rsp) ## 8-byte Spill -; CHECK-NEXT: movl %ecx, %eax -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: cvttsd2si %xmm0, %eax +; CHECK-NEXT: popq %rcx ; CHECK-NEXT: retq entry: %call = call i32 (...) @b() Index: llvm/test/CodeGen/X86/pr47000.ll =================================================================== --- llvm/test/CodeGen/X86/pr47000.ll +++ llvm/test/CodeGen/X86/pr47000.ll @@ -12,124 +12,124 @@ ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi ; CHECK-NEXT: subl $124, %esp -; CHECK-NEXT: movl 144(%esp), %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: movw 176(%esp), %dx -; CHECK-NEXT: movw 172(%esp), %si -; CHECK-NEXT: movw 168(%esp), %di -; CHECK-NEXT: movw 164(%esp), %bx -; CHECK-NEXT: movw 160(%esp), %bp +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movw 156(%esp), %ax -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movw 152(%esp), %ax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %si +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %dx +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %cx +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %ax ; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: movw 148(%esp), %ax -; CHECK-NEXT: movw %ax, 112(%esp) -; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload -; CHECK-NEXT: movw %ax, 114(%esp) +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %di +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %bx +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %bp +; CHECK-NEXT: movw {{[0-9]+}}(%esp), %ax +; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload -; CHECK-NEXT: movw %ax, 116(%esp) -; CHECK-NEXT: movw %bp, 118(%esp) -; CHECK-NEXT: movw %dx, 110(%esp) -; CHECK-NEXT: movw %si, 108(%esp) -; CHECK-NEXT: movw %di, 106(%esp) -; CHECK-NEXT: movw %bx, 104(%esp) -; CHECK-NEXT: movzwl 118(%esp), %edx -; CHECK-NEXT: movzwl 116(%esp), %esi -; CHECK-NEXT: movzwl 114(%esp), %edi -; CHECK-NEXT: movzwl 112(%esp), %ebx -; CHECK-NEXT: movzwl 110(%esp), %ebp -; CHECK-NEXT: movzwl 108(%esp), %eax +; CHECK-NEXT: movw %bp, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %bx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %di, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %si, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %dx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movzwl 106(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movzwl 104(%esp), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: movl %ebx, (%eax) -; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; CHECK-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; CHECK-NEXT: movl %ecx, (%eax) ; CHECK-NEXT: calll __gnu_h2f_ieee -; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-NEXT: movl %ecx, (%eax) ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl %ecx, (%eax) ; CHECK-NEXT: calll __gnu_h2f_ieee +; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstps 4(%eax) -; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll fmodf ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll __gnu_f2h_ieee -; CHECK-NEXT: movl %esp, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; CHECK-NEXT: movl %edx, (%ecx) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: calll __gnu_h2f_ieee ; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %ecx, (%eax) +; CHECK-NEXT: calll __gnu_h2f_ieee +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl %ecx, (%eax) ; CHECK-NEXT: calll __gnu_h2f_ieee +; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstps 4(%eax) -; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll fmodf ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll __gnu_f2h_ieee -; CHECK-NEXT: movl %esp, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; CHECK-NEXT: movl %edx, (%ecx) -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: calll __gnu_h2f_ieee -; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movw %ax, %si +; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movl %ecx, (%eax) +; CHECK-NEXT: calll __gnu_h2f_ieee +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl %ecx, (%eax) ; CHECK-NEXT: calll __gnu_h2f_ieee +; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstps 4(%eax) -; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll fmodf ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll __gnu_f2h_ieee -; CHECK-NEXT: movl %esp, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; CHECK-NEXT: movl %edx, (%ecx) -; CHECK-NEXT: movw %ax, {{[-0-9]+}}(%e{{[sb]}}p) # 2-byte Spill -; CHECK-NEXT: calll __gnu_h2f_ieee -; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movw %ax, %di +; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: movl %ecx, (%eax) +; CHECK-NEXT: calll __gnu_h2f_ieee +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill +; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: movl %ecx, (%eax) ; CHECK-NEXT: calll __gnu_h2f_ieee +; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: movl %esp, %eax +; CHECK-NEXT: fxch %st(1) ; CHECK-NEXT: fstps 4(%eax) -; CHECK-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll fmodf ; CHECK-NEXT: movl %esp, %eax ; CHECK-NEXT: fstps (%eax) ; CHECK-NEXT: calll __gnu_f2h_ieee -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; CHECK-NEXT: movw %ax, 6(%ecx) -; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %ax # 2-byte Reload -; CHECK-NEXT: movw %ax, 4(%ecx) ; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %dx # 2-byte Reload -; CHECK-NEXT: movw %dx, 2(%ecx) -; CHECK-NEXT: movw {{[-0-9]+}}(%e{{[sb]}}p), %si # 2-byte Reload -; CHECK-NEXT: movw %si, (%ecx) +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: movw %ax, %bx ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; CHECK-NEXT: movw %bx, 6(%ecx) +; CHECK-NEXT: movw %di, 4(%ecx) +; CHECK-NEXT: movw %si, 2(%ecx) +; CHECK-NEXT: movw %dx, (%ecx) ; CHECK-NEXT: addl $124, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi Index: llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir =================================================================== --- llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir +++ llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir @@ -4,7 +4,7 @@ # Bug 41973. Make sure %12 is detected as live out of %bb.0, even # though the use is allocated before the def block %bb.3. Previously # mayLiveOut only recorded on defs, and would not find the virtual -# register use if it had already been replace with a physical +# register use if it had already been replaced with a physical # register. --- @@ -21,11 +21,11 @@ ; CHECK: successors: ; CHECK: bb.2: ; CHECK: successors: %bb.3(0x80000000) - ; CHECK: $rax = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) - ; CHECK: renamable $ecx = MOV32r0 implicit-def $eflags - ; CHECK: renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit - ; CHECK: MOV64mi32 killed renamable $rax, 1, $noreg, 0, $noreg, 0 :: (volatile store 8) - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.0) + ; CHECK: $rcx = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) + ; CHECK: renamable $eax = MOV32r0 implicit-def dead $eflags + ; CHECK: renamable $rax = SUBREG_TO_REG 0, killed renamable $eax, %subreg.sub_32bit + ; CHECK: MOV64mi32 killed renamable $rcx, 1, $noreg, 0, $noreg, 0 :: (volatile store 8) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rax :: (store 8 into %stack.0) ; CHECK: bb.3: ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) @@ -46,7 +46,6 @@ bb.1: successors: - bb.2: %0:gr64 = COPY %12 %10:gr32 = MOV32r0 implicit-def $eflags Index: llvm/test/CodeGen/X86/stack-protector-msvc.ll =================================================================== --- llvm/test/CodeGen/X86/stack-protector-msvc.ll +++ llvm/test/CodeGen/X86/stack-protector-msvc.ll @@ -48,9 +48,8 @@ ; MSVC-X86-O0: xorl %esp, %[[REG1]] ; MSVC-X86-O0: movl %[[REG1]], [[SLOT:[0-9]*]](%esp) ; MSVC-X86-O0: calll _strcpy -; MSVC-X86-O0: movl [[SLOT]](%esp), %[[REG1:[^ ]*]] -; MSVC-X86-O0: xorl %esp, %[[REG1]] -; MSVC-X86-O0: movl %[[REG1]], %ecx +; MSVC-X86-O0: movl [[SLOT]](%esp), %ecx +; MSVC-X86-O0: xorl %esp, %ecx ; MSVC-X86-O0: calll @__security_check_cookie@4 ; MSVC-X86-O0: retl @@ -59,9 +58,8 @@ ; MSVC-X64-O0: xorq %rsp, %[[REG1]] ; MSVC-X64-O0: movq %[[REG1]], [[SLOT:[0-9]*]](%rsp) ; MSVC-X64-O0: callq strcpy -; MSVC-X64-O0: movq [[SLOT]](%rsp), %[[REG1:[^ ]*]] -; MSVC-X64-O0: xorq %rsp, %[[REG1]] -; MSVC-X64-O0: movq %[[REG1]], %rcx +; MSVC-X64-O0: movq [[SLOT]](%rsp), %rcx +; MSVC-X64-O0: xorq %rsp, %rcx ; MSVC-X64-O0: callq __security_check_cookie ; MSVC-X64-O0: retq Index: llvm/test/CodeGen/X86/stack-protector-strong-macho-win32-xor.ll =================================================================== --- llvm/test/CodeGen/X86/stack-protector-strong-macho-win32-xor.ll +++ llvm/test/CodeGen/X86/stack-protector-strong-macho-win32-xor.ll @@ -14,7 +14,7 @@ ; CHECK-NEXT: .cfi_offset %rbp, -16 ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp -; CHECK-NEXT: subq $336, %rsp ## imm = 0x150 +; CHECK-NEXT: subq $320, %rsp ## imm = 0x140 ; CHECK-NEXT: movq ___security_cookie@{{.*}}(%rip), %rax ; CHECK-NEXT: movq (%rax), %rax ; CHECK-NEXT: movq %rax, -8(%rbp) @@ -25,10 +25,9 @@ ; CHECK-NEXT: leaq {{.*}}(%rip), %rcx ; CHECK-NEXT: callq _printf ; CHECK-NEXT: movq -8(%rbp), %rcx -; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: callq ___security_check_cookie ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: addq $336, %rsp ## imm = 0x150 +; CHECK-NEXT: addq $320, %rsp ## imm = 0x140 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq entry: Index: llvm/test/CodeGen/X86/swift-return.ll =================================================================== --- llvm/test/CodeGen/X86/swift-return.ll +++ llvm/test/CodeGen/X86/swift-return.ll @@ -79,16 +79,15 @@ ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-O0-NEXT: movq %rsp, %rax ; CHECK-O0-NEXT: callq gen2 -; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edx -; CHECK-O0-NEXT: movl (%rsp), %esi +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %esi +; CHECK-O0-NEXT: movl (%rsp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi -; CHECK-O0-NEXT: addl %edi, %esi -; CHECK-O0-NEXT: addl %edx, %esi -; CHECK-O0-NEXT: addl %ecx, %esi -; CHECK-O0-NEXT: addl %eax, %esi -; CHECK-O0-NEXT: movl %esi, %eax +; CHECK-O0-NEXT: addl %edi, %eax +; CHECK-O0-NEXT: addl %esi, %eax +; CHECK-O0-NEXT: addl %edx, %eax +; CHECK-O0-NEXT: addl %ecx, %eax ; CHECK-O0-NEXT: addq $24, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq @@ -418,10 +417,10 @@ ; ; CHECK-O0-LABEL: gen7: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movl %edi, %eax -; CHECK-O0-NEXT: movl %edi, %edx -; CHECK-O0-NEXT: movl %edi, %ecx ; CHECK-O0-NEXT: movl %edi, %r8d +; CHECK-O0-NEXT: movl %r8d, %eax +; CHECK-O0-NEXT: movl %r8d, %edx +; CHECK-O0-NEXT: movl %r8d, %ecx ; CHECK-O0-NEXT: retq %v0 = insertvalue { i32, i32, i32, i32 } undef, i32 %key, 0 %v1 = insertvalue { i32, i32, i32, i32 } %v0, i32 %key, 1 @@ -441,10 +440,10 @@ ; ; CHECK-O0-LABEL: gen8: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movq %rdi, %rax -; CHECK-O0-NEXT: movq %rdi, %rdx -; CHECK-O0-NEXT: movq %rdi, %rcx ; CHECK-O0-NEXT: movq %rdi, %r8 +; CHECK-O0-NEXT: movq %r8, %rax +; CHECK-O0-NEXT: movq %r8, %rdx +; CHECK-O0-NEXT: movq %r8, %rcx ; CHECK-O0-NEXT: retq %v0 = insertvalue { i64, i64, i64, i64 } undef, i64 %key, 0 %v1 = insertvalue { i64, i64, i64, i64 } %v0, i64 %key, 1 @@ -464,11 +463,10 @@ ; ; CHECK-O0-LABEL: gen9: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: # kill: def $dil killed $dil killed $edi -; CHECK-O0-NEXT: movb %dil, %al -; CHECK-O0-NEXT: movb %dil, %dl -; CHECK-O0-NEXT: movb %dil, %cl ; CHECK-O0-NEXT: movb %dil, %r8b +; CHECK-O0-NEXT: movb %r8b, %al +; CHECK-O0-NEXT: movb %r8b, %dl +; CHECK-O0-NEXT: movb %r8b, %cl ; CHECK-O0-NEXT: retq %v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0 %v1 = insertvalue { i8, i8, i8, i8 } %v0, i8 %key, 1 @@ -490,17 +488,14 @@ ; ; CHECK-O0-LABEL: gen10: ; CHECK-O0: # %bb.0: -; CHECK-O0-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload -; CHECK-O0-NEXT: # xmm1 = mem[0],zero -; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 8-byte Reload -; CHECK-O0-NEXT: # xmm2 = mem[0],zero -; CHECK-O0-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm3 # 8-byte Reload -; CHECK-O0-NEXT: # xmm3 = mem[0],zero -; CHECK-O0-NEXT: movq %rdi, %rax -; CHECK-O0-NEXT: movq %rdi, %rdx -; CHECK-O0-NEXT: movq %rdi, %rcx ; CHECK-O0-NEXT: movq %rdi, %r8 +; CHECK-O0-NEXT: movaps %xmm0, %xmm3 +; CHECK-O0-NEXT: movaps %xmm3, %xmm0 +; CHECK-O0-NEXT: movaps %xmm3, %xmm1 +; CHECK-O0-NEXT: movaps %xmm3, %xmm2 +; CHECK-O0-NEXT: movq %r8, %rax +; CHECK-O0-NEXT: movq %r8, %rdx +; CHECK-O0-NEXT: movq %r8, %rcx ; CHECK-O0-NEXT: retq %v0 = insertvalue { double, double, double, double, i64, i64, i64, i64 } undef, double %keyd, 0 %v1 = insertvalue { double, double, double, double, i64, i64, i64, i64 } %v0, double %keyd, 1 @@ -569,13 +564,15 @@ ; ; CHECK-O0-LABEL: test12: ; CHECK-O0: # %bb.0: # %entry -; CHECK-O0-NEXT: pushq %rax -; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: subq $24, %rsp +; CHECK-O0-NEXT: .cfi_def_cfa_offset 32 ; CHECK-O0-NEXT: callq gen12 -; CHECK-O0-NEXT: addps %xmm1, %xmm0 -; CHECK-O0-NEXT: addps %xmm2, %xmm0 +; CHECK-O0-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill ; CHECK-O0-NEXT: movaps %xmm3, %xmm1 -; CHECK-O0-NEXT: popq %rax +; CHECK-O0-NEXT: movaps (%rsp), %xmm3 # 16-byte Reload +; CHECK-O0-NEXT: addps %xmm3, %xmm0 +; CHECK-O0-NEXT: addps %xmm2, %xmm0 +; CHECK-O0-NEXT: addq $24, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq entry: Index: llvm/test/CodeGen/X86/swifterror.ll =================================================================== --- llvm/test/CodeGen/X86/swifterror.ll +++ llvm/test/CodeGen/X86/swifterror.ll @@ -18,8 +18,9 @@ ; CHECK-O0-LABEL: foo: ; CHECK-O0: movl $16 ; CHECK-O0: malloc -; CHECK-O0: movb $1, 8(%rax) ; CHECK-O0: movq %{{.*}}, %r12 +; CHECK-O0: movb $1, 8(%rax) + entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -121,19 +122,17 @@ ; CHECK-APPLE: ret ; CHECK-O0-LABEL: foo_if: -; CHECK-O0: cmpl $0 ; spill to stack ; CHECK-O0: movq %r12, {{.*}}(%rsp) +; CHECK-O0: cmpl $0 ; CHECK-O0: je ; CHECK-O0: movl $16, ; CHECK-O0: malloc -; CHECK-O0: movq %rax, [[ID:%[a-z]+]] +; CHECK-O0: movq %rax, %r12 ; CHECK-O0-DAG: movb $1, 8(%rax) -; CHECK-O0-DAG: movq [[ID]], %r12 ; CHECK-O0: ret ; reload from stack -; CHECK-O0: movq {{.*}}(%rsp), [[REG:%[a-z]+]] -; CHECK-O0: movq [[REG]], %r12 +; CHECK-O0: movq {{.*}}(%rsp), %r12 ; CHECK-O0: ret entry: %cond = icmp ne i32 %cc, 0 @@ -177,8 +176,7 @@ ; CHECK-O0: movb $1, 8([[ID]]) ; CHECK-O0: jbe ; reload from stack -; CHECK-O0: movq {{.*}}(%rsp), [[REG:%[a-z0-9]+]] -; CHECK-O0: movq [[REG]], %r12 +; CHECK-O0: movq {{.*}}(%rsp), %r12 ; CHECK-O0: ret entry: br label %bb_loop @@ -218,16 +216,15 @@ ; CHECK-APPLE-NOT: x19 ; CHECK-O0-LABEL: foo_sret: -; CHECK-O0: movl $16, ; spill sret to stack ; CHECK-O0: movq %rdi, -; CHECK-O0: movq {{.*}}, %rdi +; CHECK-O0: movl $16, ; CHECK-O0: malloc -; CHECK-O0: movb $1, 8(%rax) -; CHECK-O0: movl %{{.*}}, 4(%{{.*}}) -; CHECK-O0: movq %{{.*}}, %r12 ; reload sret from stack ; CHECK-O0: movq {{.*}}(%rsp), %rax +; CHECK-O0: movq %{{.*}}, %r12 +; CHECK-O0: movb $1, 8(%rcx) +; CHECK-O0: movl %{{.*}}, 4(%{{.*}}) ; CHECK-O0: ret entry: %call = call i8* @malloc(i64 16) @@ -256,8 +253,8 @@ ; CHECK-O0-LABEL: caller3: ; CHECK-O0: xorl ; CHECK-O0: movl {{.*}}, %r12d +; CHECK-O0: leaq {{.*}}, %rdi ; CHECK-O0: movl $1, %esi -; CHECK-O0: movq {{.*}}, %rdi ; CHECK-O0: callq {{.*}}foo_sret ; CHECK-O0: movq %r12, ; CHECK-O0: cmpq $0 @@ -387,8 +384,9 @@ ; CHECK-O0-LABEL: foo_swiftcc: ; CHECK-O0: movl $16 ; CHECK-O0: malloc -; CHECK-O0: movb $1, 8(%rax) ; CHECK-O0: movq %{{.*}}, %r12 +; CHECK-O0: movb $1, 8(%rax) + entry: %call = call i8* @malloc(i64 16) %call.0 = bitcast i8* %call to %swift_error* @@ -435,19 +433,17 @@ ; CHECK-O0-LABEL: conditionally_forward_swifterror: ; CHECK-O0: pushq [[REG1:%[a-z0-9]+]] -; CHECK-O0: cmpl $0, %edi ; CHECK-O0-DAG: movq %r12, (%rsp) +; CHECK-O0: cmpl $0, %edi ; CHECK-O0: je -; CHECK-O0: movq (%rsp), [[REG:%[a-z0-9]+]] -; CHECK-O0: movq [[REG]], %r12 +; CHECK-O0: movq (%rsp), %r12 ; CHECK-O0: callq _moo ; CHECK-O0: popq [[REG1]] ; CHECK-O0: retq -; CHECK-O0: movq (%rsp), [[REG:%[a-z0-9]+]] +; CHECK-O0: movq (%rsp), %r12 ; CHECK-O0: xorps %xmm0, %xmm0 -; CHECK-O0: movq [[REG]], %r12 ; CHECK-O0: popq [[REG1]] ; CHECK-O0: retq entry: @@ -745,10 +741,9 @@ ; CHECK-O0-LABEL: testAssign2 ; CHECK-O0: movq %r12, [[SLOT:[-a-z0-9\(\)\%]*]] ; CHECK-O0: jmp -; CHECK-O0: movq [[SLOT]], %rax -; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT2]], %r12 -; CHECK-O0: retq +; CHECK-O0: movq [[SLOT]], %r12 +; CHECK-O0-NEXT: movq %r12, %rax +; CHECK-O0-NEXT: retq ; CHECK-APPLE-LABEL: testAssign2 ; CHECK-APPLE: movq %r12, %rax @@ -765,11 +760,10 @@ ; CHECK-O0-LABEL: testAssign3 ; CHECK-O0: callq _foo2 ; CHECK-O0: movq %r12, [[SLOT:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT]], %rax -; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT2]], %r12 -; CHECK-O0: addq $24, %rsp -; CHECK-O0: retq +; CHECK-O0: movq [[SLOT]], %r12 +; CHECK-O0-NEXT: movq %r12, %rax +; CHECK-O0-NEXT: popq %rcx +; CHECK-O0-NEXT: retq ; CHECK-APPLE-LABEL: testAssign3 ; CHECK-APPLE: callq _foo2 @@ -792,10 +786,10 @@ ; CHECK-O0: xorl %eax, %eax ; CHECK-O0: ## kill: def $rax killed $eax ; CHECK-O0: movq %rax, [[SLOT:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT]], %rax -; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]] -; CHECK-O0: movq [[SLOT2]], %r12 -; CHECK-O0: retq +; CHECK-O0: movq [[SLOT]], %r12 +; CHECK-O0-NEXT: movq %r12, %rax +; CHECK-O0-NEXT: popq %rcx +; CHECK-O0-NEXT: retq ; CHECK-APPLE-LABEL: testAssign4 ; CHECK-APPLE: callq _foo2 Index: llvm/test/CodeGen/X86/volatile.ll =================================================================== --- llvm/test/CodeGen/X86/volatile.ll +++ llvm/test/CodeGen/X86/volatile.ll @@ -5,23 +5,14 @@ @x = external global double define void @foo() nounwind { -; OPT-LABEL: foo: -; OPT: # %bb.0: -; OPT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; OPT-NEXT: xorps %xmm0, %xmm0 -; OPT-NEXT: movsd %xmm0, x -; OPT-NEXT: movsd %xmm0, x -; OPT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; OPT-NEXT: retl -; -; NOOPT-LABEL: foo: -; NOOPT: # %bb.0: -; NOOPT-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; NOOPT-NEXT: xorps %xmm1, %xmm1 -; NOOPT-NEXT: movsd %xmm1, x -; NOOPT-NEXT: movsd %xmm1, x -; NOOPT-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; NOOPT-NEXT: retl +; ALL-LABEL: foo: +; ALL: # %bb.0: +; ALL-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ALL-NEXT: xorps %xmm0, %xmm0 +; ALL-NEXT: movsd %xmm0, x +; ALL-NEXT: movsd %xmm0, x +; ALL-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; ALL-NEXT: retl %a = load volatile double, double* @x store volatile double 0.0, double* @x store volatile double 0.0, double* @x Index: llvm/test/CodeGen/X86/win64_eh.ll =================================================================== --- llvm/test/CodeGen/X86/win64_eh.ll +++ llvm/test/CodeGen/X86/win64_eh.ll @@ -82,11 +82,11 @@ } ; WIN64-LABEL: foo3: ; WIN64: .seh_proc foo3 -; NORM: subq $24, %rsp -; ATOM: leaq -24(%rsp), %rsp -; WIN64: .seh_stackalloc 24 +; NORM: subq $16, %rsp +; ATOM: leaq -16(%rsp), %rsp +; WIN64: .seh_stackalloc 16 ; WIN64: .seh_endprologue -; WIN64: addq $24, %rsp +; WIN64: addq $16, %rsp ; WIN64: ret ; WIN64: .seh_endproc Index: llvm/test/CodeGen/X86/x86-32-intrcc.ll =================================================================== --- llvm/test/CodeGen/X86/x86-32-intrcc.ll +++ llvm/test/CodeGen/X86/x86-32-intrcc.ll @@ -42,9 +42,9 @@ ; CHECK0-LABEL: test_isr_ecode ; CHECK0: pushl %ecx ; CHECK0: pushl %eax - ; CHECK0: movl 8(%esp), %eax - ; CHECK0: leal 12(%esp), %ecx - ; CHECK0: movl 8(%ecx), %ecx + ; CHECK0: movl 8(%esp), %ecx + ; CHECK0: leal 12(%esp), %eax + ; CHECK0: movl 8(%eax), %eax ; CHECK0: popl %eax ; CHECK0: popl %ecx ; CHECK0: addl $4, %esp Index: llvm/test/CodeGen/X86/x86-64-intrcc.ll =================================================================== --- llvm/test/CodeGen/X86/x86-64-intrcc.ll +++ llvm/test/CodeGen/X86/x86-64-intrcc.ll @@ -43,9 +43,9 @@ ; CHECK0: pushq %rax ; CHECK0: pushq %rax ; CHECK0: pushq %rcx - ; CHECK0: movq 24(%rsp), %rax - ; CHECK0: leaq 32(%rsp), %rcx - ; CHECK0: movq 16(%rcx), %rcx + ; CHECK0: movq 24(%rsp), %rcx + ; CHECK0: leaq 32(%rsp), %rax + ; CHECK0: movq 16(%rax), %rax ; CHECK0: popq %rcx ; CHECK0: popq %rax ; CHECK0: addq $16, %rsp Index: llvm/test/DebugInfo/AArch64/frameindices.ll =================================================================== --- llvm/test/DebugInfo/AArch64/frameindices.ll +++ llvm/test/DebugInfo/AArch64/frameindices.ll @@ -5,7 +5,7 @@ ; CHECK: DW_TAG_inlined_subroutine ; CHECK: "_Z3f111A" ; CHECK: DW_TAG_formal_parameter -; CHECK: DW_AT_location [DW_FORM_block1] (DW_OP_piece 0x1, DW_OP_fbreg -47, DW_OP_piece 0xf, DW_OP_piece 0x1, DW_OP_fbreg -54, DW_OP_piece 0x7) +; CHECK: DW_AT_location [DW_FORM_block1] (DW_OP_piece 0x1, DW_OP_fbreg -47, DW_OP_piece 0xf, DW_OP_piece 0x1, DW_OP_breg31 WSP+42, DW_OP_piece 0x7) ; CHECK: DW_AT_abstract_origin {{.*}} "p1" ; ; long a; Index: llvm/test/DebugInfo/AArch64/prologue_end.ll =================================================================== --- llvm/test/DebugInfo/AArch64/prologue_end.ll +++ llvm/test/DebugInfo/AArch64/prologue_end.ll @@ -9,9 +9,8 @@ define void @prologue_end_test() nounwind uwtable !dbg !4 { ; CHECK: prologue_end_test: ; CHECK: .cfi_startproc - ; CHECK: sub sp, sp ; CHECK: stp x29, x30 - ; CHECK: add x29, sp + ; CHECK: mov x29, sp ; CHECK: .loc 1 3 3 prologue_end ; CHECK: bl _func ; CHECK: bl _func Index: llvm/test/DebugInfo/ARM/prologue_end.ll =================================================================== --- llvm/test/DebugInfo/ARM/prologue_end.ll +++ llvm/test/DebugInfo/ARM/prologue_end.ll @@ -11,7 +11,6 @@ ; CHECK: prologue_end_test: ; CHECK: push {r7, lr} ; CHECK: {{mov r7, sp|add r7, sp}} - ; CHECK: sub sp ; CHECK: .loc 1 3 3 prologue_end ; CHECK: bl {{_func|Ltmp}} ; CHECK: bl {{_func|Ltmp}} Index: llvm/test/DebugInfo/Mips/delay-slot.ll =================================================================== --- llvm/test/DebugInfo/Mips/delay-slot.ll +++ llvm/test/DebugInfo/Mips/delay-slot.ll @@ -14,10 +14,10 @@ ; CHECK: ------------------ ------ ------ ------ --- ------------- ------------- ; CHECK: 0x0000000000000000 1 0 1 0 0 is_stmt ; CHECK: 0x0000000000000004 2 0 1 0 0 is_stmt prologue_end -; CHECK: 0x0000000000000024 3 0 1 0 0 is_stmt -; CHECK: 0x0000000000000034 4 0 1 0 0 is_stmt +; CHECK: 0x0000000000000020 3 0 1 0 0 is_stmt +; CHECK: 0x0000000000000030 4 0 1 0 0 is_stmt ; CHECK: 0x0000000000000048 5 0 1 0 0 is_stmt -; CHECK: 0x0000000000000058 5 0 1 0 0 is_stmt end_sequence +; CHECK: 0x0000000000000050 5 0 1 0 0 is_stmt end_sequence target datalayout = "E-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64" Index: llvm/test/DebugInfo/Mips/prologue_end.ll =================================================================== --- llvm/test/DebugInfo/Mips/prologue_end.ll +++ llvm/test/DebugInfo/Mips/prologue_end.ll @@ -30,7 +30,7 @@ ; PIC: addiu $[[R0]], $[[R0]], %lo(_gp_disp) ; PIC: addiu $sp, $sp, -{{[0-9]+}} ; PIC: sw $ra, {{[0-9]+}}($sp) -; PIC: addu $[[R1:[0-9]+]], $[[R0]], $25 +; PIC: addu $[[R1:[0-9]+|gp]], $[[R0]], $25 ; PIC: .loc 1 2 3 prologue_end ; PIC: lw $[[R2:[0-9]+]], %got($.str)($[[R1]]) @@ -40,7 +40,7 @@ ; PIC-FP: sw $ra, {{[0-9]+}}($sp) ; PIC-FP: sw $fp, {{[0-9]+}}($sp) ; PIC-FP: move $fp, $sp -; PIC-FP: addu $[[R1:[0-9]+]], $[[R0]], $25 +; PIC-FP: addu $[[R1:[0-9]+|gp]], $[[R0]], $25 ; PIC-FP: .loc 1 2 3 prologue_end ; PIC-FP: lw $[[R2:[0-9]+]], %got($.str)($[[R1]]) Index: llvm/test/DebugInfo/X86/dbg-declare-arg.ll =================================================================== --- llvm/test/DebugInfo/X86/dbg-declare-arg.ll +++ llvm/test/DebugInfo/X86/dbg-declare-arg.ll @@ -20,7 +20,7 @@ ; CHECK: DW_AT_name {{.*}}"j" ; CHECK: DW_TAG_variable ; CHECK-NEXT: DW_AT_location [DW_FORM_sec_offset] ( -; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): DW_OP_breg7 RSP+16, DW_OP_deref) +; CHECK-NEXT: [0x{{.*}}, 0x{{.*}}): DW_OP_breg7 RSP+8, DW_OP_deref) ; CHECK-NEXT: DW_AT_name {{.*}}"my_a" %class.A = type { i32, i32, i32, i32 } Index: llvm/test/DebugInfo/X86/fission-ranges.ll =================================================================== --- llvm/test/DebugInfo/X86/fission-ranges.ll +++ llvm/test/DebugInfo/X86/fission-ranges.ll @@ -10,11 +10,11 @@ ; LiveDebugValues should produce DBG_VALUEs for variable "b" in successive ; blocks once we recognize that it is spilled. ; CHECK-MIR: ![[BDIVAR:[0-9]+]] = !DILocalVariable(name: "b" -; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 32, DW_OP_minus) +; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 24, DW_OP_minus) ; CHECK-MIR-LABEL: bb.6.for.inc13: -; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 32, DW_OP_minus) +; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 24, DW_OP_minus) ; CHECK-MIR-LABEL: bb.7.for.inc16: -; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 32, DW_OP_minus) +; CHECK-MIR: DBG_VALUE $rsp, 0, ![[BDIVAR]], !DIExpression(DW_OP_constu, 24, DW_OP_minus) ; CHECK: .debug_info contents: @@ -46,20 +46,20 @@ ; CHECK: [[A]]: ; CHECK-NEXT: DW_LLE_startx_length (0x00000002, 0x0000000f): DW_OP_consts +0, DW_OP_stack_value -; CHECK-NEXT: DW_LLE_startx_length (0x00000003, 0x0000000f): DW_OP_reg0 RAX -; CHECK-NEXT: DW_LLE_startx_length (0x00000004, 0x00000012): DW_OP_breg7 RSP-8 +; CHECK-NEXT: DW_LLE_startx_length (0x00000003, 0x0000000b): DW_OP_reg0 RAX +; CHECK-NEXT: DW_LLE_startx_length (0x00000004, 0x00000012): DW_OP_breg7 RSP-4 ; CHECK-NEXT: DW_LLE_end_of_list () ; CHECK: [[E]]: -; CHECK-NEXT: DW_LLE_startx_length (0x00000005, 0x00000009): DW_OP_reg0 RAX -; CHECK-NEXT: DW_LLE_startx_length (0x00000006, 0x00000062): DW_OP_breg7 RSP-44 +; CHECK-NEXT: DW_LLE_startx_length (0x00000005, 0x0000000b): DW_OP_reg0 RAX +; CHECK-NEXT: DW_LLE_startx_length (0x00000006, 0x0000005a): DW_OP_breg7 RSP-36 ; CHECK-NEXT: DW_LLE_end_of_list () ; CHECK: [[B]]: -; CHECK-NEXT: DW_LLE_startx_length (0x00000007, 0x0000000f): DW_OP_reg0 RAX -; CHECK-NEXT: DW_LLE_startx_length (0x00000008, 0x00000042): DW_OP_breg7 RSP-32 +; CHECK-NEXT: DW_LLE_startx_length (0x00000007, 0x0000000b): DW_OP_reg0 RAX +; CHECK-NEXT: DW_LLE_startx_length (0x00000008, 0x00000042): DW_OP_breg7 RSP-24 ; CHECK-NEXT: DW_LLE_end_of_list () ; CHECK: [[D]]: -; CHECK-NEXT: DW_LLE_startx_length (0x00000009, 0x0000000f): DW_OP_reg0 RAX -; CHECK-NEXT: DW_LLE_startx_length (0x0000000a, 0x0000002a): DW_OP_breg7 RSP-20 +; CHECK-NEXT: DW_LLE_startx_length (0x00000009, 0x0000000b): DW_OP_reg0 RAX +; CHECK-NEXT: DW_LLE_startx_length (0x0000000a, 0x0000002a): DW_OP_breg7 RSP-12 ; CHECK-NEXT: DW_LLE_end_of_list () ; Make sure we don't produce any relocations in any .dwo section (though in particular, debug_info.dwo) @@ -81,7 +81,7 @@ ; V5RNGLISTS-NOT: DW_TAG ; V5RNGLISTS: DW_AT_rnglists_base [DW_FORM_sec_offset] (0x0000000c) ; V5RNGLISTS: .debug_rnglists contents: -; V5RNGLISTS-NEXT: 0x00000000: range list header: length = 0x00000019, format = DWARF32, version = 0x0005, +; V5RNGLISTS-NEXT: 0x00000000: range list header: length = 0x00000015, format = DWARF32, version = 0x0005, ; V5RNGLISTS-SAME: addr_size = 0x08, seg_size = 0x00, offset_entry_count = 0x00000001 ; V5RNGLISTS-NEXT: offsets: [ ; V5RNGLISTS-NEXT: => 0x00000010 @@ -96,7 +96,7 @@ ; extern int c; ; static void foo (int p) ; { -; int a, b; +; int a, b; ; unsigned int d, e; ; for (a = 0; a < 30; a++) @@ -104,12 +104,12 @@ ; for (b = 0; b < 30; b++) ; for (e = 0; e < 30; e++) ; { -; int *w = &c; -; *w &= p; +; int *w = &c; +; *w &= p; ; } ; } -; void +; void ; bar () ; { ; foo (1); Index: llvm/test/DebugInfo/X86/op_deref.ll =================================================================== --- llvm/test/DebugInfo/X86/op_deref.ll +++ llvm/test/DebugInfo/X86/op_deref.ll @@ -6,10 +6,12 @@ ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=DWARF3 ; DWARF4: DW_AT_location [DW_FORM_sec_offset] (0x00000000 -; DWARF4-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref +; DWARF4-NEXT: {{.*}}: DW_OP_breg6 RBP-40, DW_OP_deref, DW_OP_deref +; DWARF4-NEXT: {{.*}}: DW_OP_breg0 RAX+0, DW_OP_deref) ; DWARF3: DW_AT_location [DW_FORM_data4] (0x00000000 -; DWARF3-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref +; DWARF3-NEXT: {{.*}}: DW_OP_breg6 RBP-40, DW_OP_deref, DW_OP_deref +; DWARF3-NEXT: {{.*}}: DW_OP_breg0 RAX+0, DW_OP_deref ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000067] = "vla") @@ -17,8 +19,8 @@ ; Check the DEBUG_VALUE comments for good measure. ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK ; vla should have a register-indirect address at one point. -; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rcx+0] -; ASM-CHECK: DW_OP_breg2 +; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rax+0] +; ASM-CHECK: DW_OP_breg6 ; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s --check-prefix=PRETTY-PRINT ; PRETTY-PRINT: DIExpression(DW_OP_deref) Index: llvm/test/DebugInfo/X86/parameters.ll =================================================================== --- llvm/test/DebugInfo/X86/parameters.ll +++ llvm/test/DebugInfo/X86/parameters.ll @@ -37,8 +37,8 @@ ; CHECK: DW_AT_location{{.*}}(DW_OP_fbreg +23) ; CHECK: DW_TAG_formal_parameter ; CHECK: DW_AT_location{{.*}}( -; CHECK-NEXT: {{.*}}: DW_OP_breg4 RSI+0, DW_OP_deref -; CHECK-NEXT: {{.*}}: DW_OP_breg7 RSP+8, DW_OP_deref, DW_OP_deref) +; CHECK-NEXT: {{.*}}: DW_OP_breg7 RSP+8, DW_OP_deref, DW_OP_deref +; CHECK-NEXT: {{.*}}: DW_OP_breg4 RSI+0, DW_OP_deref) ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name{{.*}} = "g" Index: llvm/test/DebugInfo/X86/pieces-1.ll =================================================================== --- llvm/test/DebugInfo/X86/pieces-1.ll +++ llvm/test/DebugInfo/X86/pieces-1.ll @@ -16,7 +16,7 @@ ; CHECK: .debug_loc contents: ; -; CHECK: (0x0000000000000000, 0x[[LTMP3:.*]]): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_reg4 RSI, DW_OP_piece 0x4 +; CHECK: (0x0000000000000006, 0x[[LTMP3:.*]]): DW_OP_reg5 RDI, DW_OP_piece 0x8, DW_OP_reg0 RAX, DW_OP_piece 0x4 ; 0x0000000000000006 - 0x0000000000000008: rbp-8, piece 0x8, rax, piece 0x4 ) target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" Index: llvm/test/DebugInfo/X86/prologue-stack.ll =================================================================== --- llvm/test/DebugInfo/X86/prologue-stack.ll +++ llvm/test/DebugInfo/X86/prologue-stack.ll @@ -6,7 +6,7 @@ ; return 0; ; } -define i32 @isel_line_test2() nounwind uwtable !dbg !5 { +define i32 @isel_line_test2(i32 %arg) nounwind uwtable !dbg !5 { ; The stack adjustment should be part of the prologue. ; CHECK: isel_line_test2: ; CHECK: {{subq|leaq}} {{.*}}, %rsp @@ -14,8 +14,9 @@ ; CHECK: movl $400, %edi ; CHECK: callq callme entry: + ; %arg should get spilled here, so we need to setup a stackframe %call = call i32 @callme(i32 400), !dbg !10 - ret i32 0, !dbg !12 + ret i32 %arg, !dbg !12 } declare i32 @callme(i32) Index: llvm/test/DebugInfo/X86/reference-argument.ll =================================================================== --- llvm/test/DebugInfo/X86/reference-argument.ll +++ llvm/test/DebugInfo/X86/reference-argument.ll @@ -13,7 +13,7 @@ ; CHECK-NOT: DW_TAG_subprogram ; CHECK: DW_TAG_formal_parameter ; CHECK-NEXT: DW_AT_location -; CHECK-NEXT: DW_OP_breg4 RSI+0 +; CHECK-NEXT: DW_OP_breg5 RDI+0 ; CHECK-NEXT: DW_AT_name {{.*}} "v" target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" Index: llvm/test/DebugInfo/X86/spill-indirect-nrvo.ll =================================================================== --- llvm/test/DebugInfo/X86/spill-indirect-nrvo.ll +++ llvm/test/DebugInfo/X86/spill-indirect-nrvo.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s | FileCheck %s -; RUN: llc -O0 < %s | FileCheck %s +; RUN: llc < %s | FileCheck -check-prefixes=CHECK,OPT %s +; RUN: llc -O0 < %s | FileCheck -check-prefixes=CHECK,OPTNONE %s ; Make sure we insert DW_OP_deref when spilling indirect DBG_VALUE instructions. @@ -21,10 +21,18 @@ ; } ; CHECK-LABEL: _Z10get_stringv: -; CHECK: #DEBUG_VALUE: get_string:result <- [$rdi+0] -; CHECK: movq %rdi, [[OFFS:[0-9]+]](%rsp) # 8-byte Spill -; CHECK: #DEBUG_VALUE: get_string:result <- [DW_OP_plus_uconst [[OFFS]], DW_OP_deref] [$rsp+0] -; CHECK: callq _ZN6stringC1Ei + +; OPT: #DEBUG_VALUE: get_string:result <- [$rdi+0] +; OPT: movq %rdi, [[OFFS:[0-9]+]](%rsp) # 8-byte Spill +; OPT: #DEBUG_VALUE: get_string:result <- [DW_OP_plus_uconst [[OFFS]], DW_OP_deref] [$rsp+0] +; OPT: callq _ZN6stringC1Ei + +; OPTNONE: #DEBUG_VALUE: get_string:result <- [DW_OP_deref] [$rsp+0] +; OPTNONE: movq %rdi, %rax +; OPTNONE: movq %rax, [[OFFS:[0-9]+]](%rsp) # 8-byte Spill +; OPTNONE: #DEBUG_VALUE: get_string:result <- [$rdi+0] +; OPTNONE: callq _ZN6stringC1Ei + ; CHECK: #APP ; CHECK: #NO_APP Index: llvm/test/DebugInfo/X86/sret.ll =================================================================== --- llvm/test/DebugInfo/X86/sret.ll +++ llvm/test/DebugInfo/X86/sret.ll @@ -3,16 +3,17 @@ ; Based on the debuginfo-tests/sret.cpp code. -; CHECK-DWO: DW_AT_GNU_dwo_id (0x7e62530711b94622) -; CHECK-DWO: DW_AT_GNU_dwo_id (0x7e62530711b94622) +; CHECK-DWO: DW_AT_GNU_dwo_id (0x409e35dbb641730e) +; CHECK-DWO: DW_AT_GNU_dwo_id (0x409e35dbb641730e) -; RUN: llc -O0 -fast-isel=true -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck %s -; RUN: llc -O0 -fast-isel=false -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck %s +; RUN: llc -O0 -fast-isel=true -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck -check-prefixes=CHECK,FASTISEL %s +; RUN: llc -O0 -fast-isel=false -mtriple=x86_64-apple-darwin -filetype=obj -o - %s | llvm-dwarfdump -debug-info - | FileCheck -check-prefixes=CHECK,SDAG %s ; CHECK: _ZN1B9AInstanceEv ; CHECK: DW_TAG_variable ; CHECK-NEXT: DW_AT_location (0x00000000 -; CHECK-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0 -; CHECK-NEXT: [{{.*}}, {{.*}}): DW_OP_breg6 RBP-24, DW_OP_deref) +; FASTISEL-NEXT: [{{.*}}, {{.*}}): DW_OP_breg6 RBP-32, DW_OP_deref +; FASTISEL-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0) +; SDAG-NEXT: [{{.*}}, {{.*}}): DW_OP_breg5 RDI+0) ; CHECK-NEXT: DW_AT_name {{.*}}"a" %class.A = type { i32 (...)**, i32 } Index: llvm/test/DebugInfo/X86/subreg.ll =================================================================== --- llvm/test/DebugInfo/X86/subreg.ll +++ llvm/test/DebugInfo/X86/subreg.ll @@ -3,7 +3,7 @@ ; We are testing that a value in a 16 bit register gets reported as ; being in its superregister. -; CHECK: .byte 85 # super-register DW_OP_reg5 +; CHECK: .byte 80 # super-register DW_OP_reg0 ; No need to a piece at offset 0. ; CHECK-NOT: DW_OP_piece ; CHECK-NOT: DW_OP_bit_piece