Index: llvm/lib/CodeGen/RegAllocFast.cpp =================================================================== --- llvm/lib/CodeGen/RegAllocFast.cpp +++ llvm/lib/CodeGen/RegAllocFast.cpp @@ -106,13 +106,8 @@ /// that it is alive across blocks. BitVector MayLiveAcrossBlocks; - /// State of a physical register. - enum RegState { - /// A disabled register is not available for allocation, but an alias may - /// be in use. A register can only be moved out of the disabled state if - /// all aliases are disabled. - regDisabled, - + /// State of a register unit. + enum RegUnitState { /// A free register is not currently in use and can be allocated /// immediately without checking aliases. regFree, @@ -126,8 +121,8 @@ /// register. In that case, LiveVirtRegs contains the inverse mapping. }; - /// Maps each physical register to a RegState enum or a virtual register. - std::vector PhysRegState; + /// Maps each physical register to a RegUnitState enum or virtual register. + std::vector RegUnitStates; SmallVector VirtDead; SmallVector Coalesced; @@ -138,6 +133,7 @@ RegUnitSet UsedInInstr; void setPhysRegState(MCPhysReg PhysReg, unsigned NewState); + bool isPhysRegFree(MCPhysReg PhysReg) const; /// Mark a physreg as used in this instruction. void markRegUsedInInstr(MCPhysReg PhysReg) { @@ -189,6 +185,7 @@ bool isLastUseOfLocalReg(const MachineOperand &MO) const; void addKillFlag(const LiveReg &LRI); + bool verifyRegStateMapping(const LiveReg &LR) const; void killVirtReg(LiveReg &LR); void killVirtReg(Register VirtReg); void spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR); @@ -196,7 +193,7 @@ void usePhysReg(MachineOperand &MO); void definePhysReg(MachineBasicBlock::iterator MI, MCPhysReg PhysReg, - RegState NewState); + unsigned NewState); unsigned calcSpillCost(MCPhysReg PhysReg) const; void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg); @@ -229,7 +226,8 @@ bool mayLiveOut(Register VirtReg); bool mayLiveIn(Register VirtReg); - void dumpState(); + void printRegUnitState(unsigned State) const; + void dumpState() const; }; } // end anonymous namespace @@ -240,7 +238,16 @@ false) void RegAllocFast::setPhysRegState(MCPhysReg PhysReg, unsigned NewState) { - PhysRegState[PhysReg] = NewState; + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) + RegUnitStates[*UI] = NewState; +} + +bool RegAllocFast::isPhysRegFree(MCPhysReg PhysReg) const { + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + if (RegUnitStates[*UI] != regFree) + return false; + } + return true; } /// This allocates space for the specified virtual register to be held on the @@ -384,12 +391,21 @@ } } +bool RegAllocFast::verifyRegStateMapping(const LiveReg &LR) const { + for (MCRegUnitIterator UI(LR.PhysReg, TRI); UI.isValid(); ++UI) { + if (RegUnitStates[*UI] != LR.VirtReg) + return false; + } + + return true; +} + /// Mark virtreg as no longer available. void RegAllocFast::killVirtReg(LiveReg &LR) { + assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); addKillFlag(LR); - assert(PhysRegState[LR.PhysReg] == LR.VirtReg && - "Broken RegState mapping"); - setPhysRegState(LR.PhysReg, regFree); + MCPhysReg PhysReg = LR.PhysReg; + setPhysRegState(PhysReg, regFree); LR.PhysReg = 0; } @@ -416,7 +432,9 @@ /// Do the actual work of spilling. void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) { - assert(PhysRegState[LR.PhysReg] == LR.VirtReg && "Broken RegState mapping"); + assert(verifyRegStateMapping(LR) && "Broken RegState mapping"); + + MCPhysReg PhysReg = LR.PhysReg; if (LR.Dirty) { // If this physreg is used by the instruction, we want to kill it on the @@ -424,7 +442,7 @@ bool SpillKill = MachineBasicBlock::iterator(LR.LastUse) != MI; LR.Dirty = false; - spill(MI, LR.VirtReg, LR.PhysReg, SpillKill); + spill(MI, LR.VirtReg, PhysReg, SpillKill); if (SpillKill) LR.LastUse = nullptr; // Don't kill register again @@ -460,53 +478,16 @@ assert(PhysReg.isPhysical() && "Bad usePhysReg operand"); markRegUsedInInstr(PhysReg); - switch (PhysRegState[PhysReg]) { - case regDisabled: - break; - case regReserved: - PhysRegState[PhysReg] = regFree; - LLVM_FALLTHROUGH; - case regFree: - MO.setIsKill(); - return; - default: - // The physreg was allocated to a virtual register. That means the value we - // wanted has been clobbered. - llvm_unreachable("Instruction uses an allocated register"); - } - // Maybe a superregister is reserved? - for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - MCPhysReg Alias = *AI; - switch (PhysRegState[Alias]) { - case regDisabled: - break; + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + switch (RegUnitStates[*UI]) { case regReserved: - // Either PhysReg is a subregister of Alias and we mark the - // whole register as free, or PhysReg is the superregister of - // Alias and we mark all the aliases as disabled before freeing - // PhysReg. - // In the latter case, since PhysReg was disabled, this means that - // its value is defined only by physical sub-registers. This check - // is performed by the assert of the default case in this loop. - // Note: The value of the superregister may only be partial - // defined, that is why regDisabled is a valid state for aliases. - assert((TRI->isSuperRegister(PhysReg, Alias) || - TRI->isSuperRegister(Alias, PhysReg)) && - "Instruction is not using a subregister of a reserved register"); + RegUnitStates[*UI] = regFree; LLVM_FALLTHROUGH; case regFree: - if (TRI->isSuperRegister(PhysReg, Alias)) { - // Leave the superregister in the working set. - setPhysRegState(Alias, regFree); - MO.getParent()->addRegisterKilled(Alias, TRI, true); - return; - } - // Some other alias was in the working set - clear it. - setPhysRegState(Alias, regDisabled); break; default: - llvm_unreachable("Instruction uses an alias of an allocated register"); + llvm_unreachable("Unexpected reg unit state"); } } @@ -519,38 +500,20 @@ /// similar to defineVirtReg except the physreg is reserved instead of /// allocated. void RegAllocFast::definePhysReg(MachineBasicBlock::iterator MI, - MCPhysReg PhysReg, RegState NewState) { - markRegUsedInInstr(PhysReg); - switch (Register VirtReg = PhysRegState[PhysReg]) { - case regDisabled: - break; - default: - spillVirtReg(MI, VirtReg); - LLVM_FALLTHROUGH; - case regFree: - case regReserved: - setPhysRegState(PhysReg, NewState); - return; - } - - // This is a disabled register, disable all aliases. - setPhysRegState(PhysReg, NewState); - for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - MCPhysReg Alias = *AI; - switch (Register VirtReg = PhysRegState[Alias]) { - case regDisabled: - break; + MCPhysReg PhysReg, unsigned NewState) { + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + switch (unsigned VirtReg = RegUnitStates[*UI]) { default: spillVirtReg(MI, VirtReg); - LLVM_FALLTHROUGH; + break; case regFree: case regReserved: - setPhysRegState(Alias, regDisabled); - if (TRI->isSuperRegister(PhysReg, Alias)) - return; break; } } + + markRegUsedInInstr(PhysReg); + setPhysRegState(PhysReg, NewState); } /// Return the cost of spilling clearing out PhysReg and aliases so it is free @@ -563,46 +526,24 @@ << " is already used in instr.\n"); return spillImpossible; } - switch (Register VirtReg = PhysRegState[PhysReg]) { - case regDisabled: - break; - case regFree: - return 0; - case regReserved: - LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " - << printReg(PhysReg, TRI) << " is reserved already.\n"); - return spillImpossible; - default: { - LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); - assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && - "Missing VirtReg entry"); - return LRI->Dirty ? spillDirty : spillClean; - } - } - // This is a disabled register, add up cost of aliases. - LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is disabled.\n"); - unsigned Cost = 0; - for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { - MCPhysReg Alias = *AI; - switch (Register VirtReg = PhysRegState[Alias]) { - case regDisabled: - break; + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + switch (unsigned VirtReg = RegUnitStates[*UI]) { case regFree: - ++Cost; break; case regReserved: + LLVM_DEBUG(dbgs() << printReg(VirtReg, TRI) << " corresponding " + << printReg(PhysReg, TRI) << " is reserved already.\n"); return spillImpossible; default: { LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg); assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && "Missing VirtReg entry"); - Cost += LRI->Dirty ? spillDirty : spillClean; - break; + return LRI->Dirty ? spillDirty : spillClean; } } } - return Cost; + return 0; } /// This method updates local state so that we know that PhysReg is the @@ -909,9 +850,17 @@ if (!Reg || !Reg.isPhysical()) continue; markRegUsedInInstr(Reg); - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - if (ThroughRegs.count(PhysRegState[*AI])) - definePhysReg(MI, *AI, regFree); + + for (MCRegUnitIterator UI(Reg, TRI); UI.isValid(); ++UI) { + if (!ThroughRegs.count(RegUnitStates[*UI])) + continue; + + // Need to spill any aliasing registers. + for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) { + for (MCSuperRegIterator SI(*RI, TRI, true); SI.isValid(); ++SI) { + definePhysReg(MI, *SI, regFree); + } + } } } @@ -975,37 +924,40 @@ } #ifndef NDEBUG -void RegAllocFast::dumpState() { - for (unsigned Reg = 1, E = TRI->getNumRegs(); Reg != E; ++Reg) { - if (PhysRegState[Reg] == regDisabled) continue; - dbgs() << " " << printReg(Reg, TRI); - switch(PhysRegState[Reg]) { + +void RegAllocFast::dumpState() const { + for (unsigned Unit = 1, UnitE = TRI->getNumRegUnits(); Unit != UnitE; + ++Unit) { + switch (unsigned VirtReg = RegUnitStates[Unit]) { case regFree: break; case regReserved: - dbgs() << "*"; + dbgs() << " " << printRegUnit(Unit, TRI) << "[P]"; break; default: { - dbgs() << '=' << printReg(PhysRegState[Reg]); - LiveRegMap::iterator LRI = findLiveVirtReg(PhysRegState[Reg]); - assert(LRI != LiveVirtRegs.end() && LRI->PhysReg && - "Missing VirtReg entry"); - if (LRI->Dirty) - dbgs() << "*"; - assert(LRI->PhysReg == Reg && "Bad inverse map"); + dbgs() << ' ' << printRegUnit(Unit, TRI) << '=' << printReg(VirtReg); + LiveRegMap::const_iterator I = findLiveVirtReg(VirtReg); + assert(I != LiveVirtRegs.end() && "have LiveVirtRegs entry"); + if (I->Dirty) + dbgs() << "[D]"; + assert(TRI->hasRegUnit(I->PhysReg, Unit) && "inverse mapping present"); break; } } } dbgs() << '\n'; // Check that LiveVirtRegs is the inverse. - for (LiveRegMap::iterator i = LiveVirtRegs.begin(), - e = LiveVirtRegs.end(); i != e; ++i) { - if (!i->PhysReg) - continue; - assert(i->VirtReg.isVirtual() && "Bad map key"); - assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value"); - assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map"); + for (const LiveReg &LR : LiveVirtRegs) { + Register VirtReg = LR.VirtReg; + assert(VirtReg.isVirtual() && "Bad map key"); + MCPhysReg PhysReg = LR.PhysReg; + if (PhysReg != 0) { + assert(Register::isPhysicalRegister(PhysReg) && + "mapped to physreg"); + for (MCRegUnitIterator UI(PhysReg, TRI); UI.isValid(); ++UI) { + assert(RegUnitStates[*UI] == VirtReg && "inverse map valid"); + } + } } } #endif @@ -1247,7 +1199,7 @@ this->MBB = &MBB; LLVM_DEBUG(dbgs() << "\nAllocating " << MBB); - PhysRegState.assign(TRI->getNumRegs(), regDisabled); + RegUnitStates.assign(TRI->getNumRegUnits(), regFree); assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?"); MachineBasicBlock::iterator MII = MBB.begin(); Index: llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll +++ llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion-fallback.ll @@ -4,8 +4,8 @@ define i32 @fptosi_wh(half %a) nounwind ssp { entry: ; CHECK-LABEL: fptosi_wh -; CHECK: fcvt s1, h0 -; CHECK: fcvtzs [[REG:w[0-9]+]], s1 +; CHECK: fcvt s0, h0 +; CHECK: fcvtzs [[REG:w[0-9]+]], s0 ; CHECK: mov w0, [[REG]] %conv = fptosi half %a to i32 ret i32 %conv @@ -15,8 +15,8 @@ define i32 @fptoui_swh(half %a) nounwind ssp { entry: ; CHECK-LABEL: fptoui_swh -; CHECK: fcvt s1, h0 -; CHECK: fcvtzu [[REG:w[0-9]+]], s1 +; CHECK: fcvt s0, h0 +; CHECK: fcvtzu [[REG:w[0-9]+]], s0 ; CHECK: mov w0, [[REG]] %conv = fptoui half %a to i32 ret i32 %conv Index: llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll +++ llvm/test/CodeGen/AArch64/arm64-fast-isel-conversion.ll @@ -54,8 +54,8 @@ ; CHECK: ldrh w8, [sp, #12] ; CHECK: str w8, [sp, #8] ; CHECK: ldr w8, [sp, #8] -; CHECK: mov x9, x8 -; CHECK: str x9, [sp] +; CHECK: ; kill: def $x8 killed $w8 +; CHECK: str x8, [sp] ; CHECK: ldr x0, [sp] ; CHECK: ret %a.addr = alloca i8, align 1 @@ -109,8 +109,8 @@ ; CHECK: strh w8, [sp, #12] ; CHECK: ldrsh w8, [sp, #12] ; CHECK: str w8, [sp, #8] -; CHECK: ldrsw x9, [sp, #8] -; CHECK: str x9, [sp] +; CHECK: ldrsw x8, [sp, #8] +; CHECK: str x8, [sp] ; CHECK: ldr x0, [sp] ; CHECK: ret %a.addr = alloca i8, align 1 Index: llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll +++ llvm/test/CodeGen/AArch64/arm64-vcvt_f.ll @@ -285,11 +285,11 @@ ; FAST: // %bb.0: ; FAST-NEXT: sub sp, sp, #16 // =16 ; FAST-NEXT: .cfi_def_cfa_offset 16 -; FAST-NEXT: fcvt h1, s0 +; FAST-NEXT: fcvt h0, s0 ; FAST-NEXT: // implicit-def: $w0 -; FAST-NEXT: fmov s0, w0 -; FAST-NEXT: mov.16b v0, v1 -; FAST-NEXT: fmov w8, s0 +; FAST-NEXT: fmov s1, w0 +; FAST-NEXT: mov.16b v1, v0 +; FAST-NEXT: fmov w8, s1 ; FAST-NEXT: mov w0, w8 ; FAST-NEXT: str w0, [sp, #12] // 4-byte Folded Spill ; FAST-NEXT: mov w0, w8 Index: llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll =================================================================== --- llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll +++ llvm/test/CodeGen/AArch64/fast-isel-sp-adjust.ll @@ -15,8 +15,7 @@ ; CHECK-LABEL: foo: ; CHECK: sub ; CHECK-DAG: mov x[[SP:[0-9]+]], sp -; CHECK-DAG: mov [[TMP:w[0-9]+]], #4104 -; CHECK: mov w[[OFFSET:[0-9]+]], [[TMP]] +; CHECK-DAG: mov w[[OFFSET:[0-9]+]], #4104 ; CHECK: strb w0, [x[[SP]], x[[OFFSET]]] define void @foo(i8 %in) { Index: llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll +++ llvm/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -69,15 +69,15 @@ ; GCN: renamable $vgpr30 = COPY killed renamable $vgpr14 ; GCN: renamable $vgpr31 = COPY killed renamable $vgpr15 ; GCN: renamable $vgpr32 = COPY killed renamable $vgpr16 - ; GCN: renamable $sgpr20_sgpr21 = S_MOV_B64 $exec + ; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec ; GCN: renamable $vgpr1 = IMPLICIT_DEF - ; GCN: renamable $sgpr22_sgpr23 = IMPLICIT_DEF + ; GCN: renamable $sgpr2_sgpr3 = IMPLICIT_DEF ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) ; GCN: SI_SPILL_S128_SAVE killed $sgpr4_sgpr5_sgpr6_sgpr7, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 16 into %stack.1, align 4, addrspace 5) ; GCN: SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5) - ; GCN: SI_SPILL_S64_SAVE killed $sgpr20_sgpr21, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5) + ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.3, align 4, addrspace 5) ; GCN: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) - ; GCN: SI_SPILL_S64_SAVE killed $sgpr22_sgpr23, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) + ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) ; GCN: bb.1: ; GCN: successors: %bb.1(0x40000000), %bb.3(0x40000000) ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (load 8 from %stack.5, align 4, addrspace 5) @@ -91,8 +91,8 @@ ; GCN: renamable $vgpr18 = V_MOV_B32_e32 undef $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0 ; GCN: S_SET_GPR_IDX_OFF implicit-def $mode, implicit $mode ; GCN: renamable $vgpr19 = COPY renamable $vgpr18 - ; GCN: renamable $sgpr6_sgpr7 = COPY renamable $sgpr4_sgpr5 - ; GCN: SI_SPILL_S64_SAVE killed $sgpr6_sgpr7, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) + ; GCN: renamable $sgpr2_sgpr3 = COPY renamable $sgpr4_sgpr5 + ; GCN: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.5, align 4, addrspace 5) ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store 8 into %stack.6, align 4, addrspace 5) ; GCN: SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr32, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) Index: llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll +++ llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll @@ -11,7 +11,7 @@ define amdgpu_kernel void @spill_sgprs_to_multiple_vgprs(i32 addrspace(1)* %out, i32 %in) #0 { ; GCN-LABEL: spill_sgprs_to_multiple_vgprs: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dword s2, s[0:1], 0xb +; GCN-NEXT: s_load_dword s0, s[0:1], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:11] ; GCN-NEXT: ;;#ASMEND @@ -42,352 +42,354 @@ ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[84:91] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 0 -; GCN-NEXT: v_writelane_b32 v0, s5, 1 -; GCN-NEXT: v_writelane_b32 v0, s6, 2 -; GCN-NEXT: v_writelane_b32 v0, s7, 3 -; GCN-NEXT: v_writelane_b32 v0, s8, 4 -; GCN-NEXT: v_writelane_b32 v0, s9, 5 -; GCN-NEXT: v_writelane_b32 v0, s10, 6 -; GCN-NEXT: v_writelane_b32 v0, s11, 7 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:11] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 8 -; GCN-NEXT: v_writelane_b32 v0, s5, 9 -; GCN-NEXT: v_writelane_b32 v0, s6, 10 -; GCN-NEXT: v_writelane_b32 v0, s7, 11 -; GCN-NEXT: v_writelane_b32 v0, s8, 12 -; GCN-NEXT: v_writelane_b32 v0, s9, 13 -; GCN-NEXT: v_writelane_b32 v0, s10, 14 -; GCN-NEXT: v_writelane_b32 v0, s11, 15 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:11] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 16 -; GCN-NEXT: v_writelane_b32 v0, s5, 17 -; GCN-NEXT: v_writelane_b32 v0, s6, 18 -; GCN-NEXT: v_writelane_b32 v0, s7, 19 -; GCN-NEXT: v_writelane_b32 v0, s8, 20 -; GCN-NEXT: v_writelane_b32 v0, s9, 21 -; GCN-NEXT: v_writelane_b32 v0, s10, 22 -; GCN-NEXT: v_writelane_b32 v0, s11, 23 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:11] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 24 -; GCN-NEXT: v_writelane_b32 v0, s5, 25 -; GCN-NEXT: v_writelane_b32 v0, s6, 26 -; GCN-NEXT: v_writelane_b32 v0, s7, 27 -; GCN-NEXT: v_writelane_b32 v0, s8, 28 -; GCN-NEXT: v_writelane_b32 v0, s9, 29 -; GCN-NEXT: v_writelane_b32 v0, s10, 30 -; GCN-NEXT: v_writelane_b32 v0, s11, 31 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:11] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 32 -; GCN-NEXT: v_writelane_b32 v0, s5, 33 -; GCN-NEXT: v_writelane_b32 v0, s6, 34 -; GCN-NEXT: v_writelane_b32 v0, s7, 35 -; GCN-NEXT: v_writelane_b32 v0, s8, 36 -; GCN-NEXT: v_writelane_b32 v0, s9, 37 -; GCN-NEXT: v_writelane_b32 v0, s10, 38 -; GCN-NEXT: v_writelane_b32 v0, s11, 39 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:11] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 40 -; GCN-NEXT: v_writelane_b32 v0, s5, 41 -; GCN-NEXT: v_writelane_b32 v0, s6, 42 -; GCN-NEXT: v_writelane_b32 v0, s7, 43 -; GCN-NEXT: v_writelane_b32 v0, s8, 44 -; GCN-NEXT: v_writelane_b32 v0, s9, 45 -; GCN-NEXT: v_writelane_b32 v0, s10, 46 -; GCN-NEXT: v_writelane_b32 v0, s11, 47 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:11] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 48 -; GCN-NEXT: v_writelane_b32 v0, s5, 49 -; GCN-NEXT: v_writelane_b32 v0, s6, 50 -; GCN-NEXT: v_writelane_b32 v0, s7, 51 -; GCN-NEXT: v_writelane_b32 v0, s8, 52 -; GCN-NEXT: v_writelane_b32 v0, s9, 53 -; GCN-NEXT: v_writelane_b32 v0, s10, 54 -; GCN-NEXT: v_writelane_b32 v0, s11, 55 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:11] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s3, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_cmp_lg_u32 s2, s3 -; GCN-NEXT: v_writelane_b32 v0, s12, 56 -; GCN-NEXT: v_writelane_b32 v0, s13, 57 -; GCN-NEXT: v_writelane_b32 v0, s14, 58 -; GCN-NEXT: v_writelane_b32 v0, s15, 59 -; GCN-NEXT: v_writelane_b32 v0, s16, 60 -; GCN-NEXT: v_writelane_b32 v0, s17, 61 -; GCN-NEXT: v_writelane_b32 v0, s18, 62 -; GCN-NEXT: v_writelane_b32 v0, s19, 63 -; GCN-NEXT: v_writelane_b32 v1, s20, 0 -; GCN-NEXT: v_writelane_b32 v1, s21, 1 -; GCN-NEXT: v_writelane_b32 v1, s22, 2 -; GCN-NEXT: v_writelane_b32 v1, s23, 3 -; GCN-NEXT: v_writelane_b32 v1, s24, 4 -; GCN-NEXT: v_writelane_b32 v1, s25, 5 -; GCN-NEXT: v_writelane_b32 v1, s26, 6 -; GCN-NEXT: v_writelane_b32 v1, s27, 7 -; GCN-NEXT: v_writelane_b32 v1, s36, 8 -; GCN-NEXT: v_writelane_b32 v1, s37, 9 -; GCN-NEXT: v_writelane_b32 v1, s38, 10 -; GCN-NEXT: v_writelane_b32 v1, s39, 11 -; GCN-NEXT: v_writelane_b32 v1, s40, 12 -; GCN-NEXT: v_writelane_b32 v1, s41, 13 -; GCN-NEXT: v_writelane_b32 v1, s42, 14 -; GCN-NEXT: v_writelane_b32 v1, s43, 15 -; GCN-NEXT: v_writelane_b32 v1, s44, 16 -; GCN-NEXT: v_writelane_b32 v1, s45, 17 -; GCN-NEXT: v_writelane_b32 v1, s46, 18 -; GCN-NEXT: v_writelane_b32 v1, s47, 19 -; GCN-NEXT: v_writelane_b32 v1, s48, 20 -; GCN-NEXT: v_writelane_b32 v1, s49, 21 -; GCN-NEXT: v_writelane_b32 v1, s50, 22 -; GCN-NEXT: v_writelane_b32 v1, s51, 23 -; GCN-NEXT: v_writelane_b32 v1, s52, 24 -; GCN-NEXT: v_writelane_b32 v1, s53, 25 -; GCN-NEXT: v_writelane_b32 v1, s54, 26 -; GCN-NEXT: v_writelane_b32 v1, s55, 27 -; GCN-NEXT: v_writelane_b32 v1, s56, 28 -; GCN-NEXT: v_writelane_b32 v1, s57, 29 -; GCN-NEXT: v_writelane_b32 v1, s58, 30 -; GCN-NEXT: v_writelane_b32 v1, s59, 31 -; GCN-NEXT: v_writelane_b32 v1, s60, 32 -; GCN-NEXT: v_writelane_b32 v1, s61, 33 -; GCN-NEXT: v_writelane_b32 v1, s62, 34 -; GCN-NEXT: v_writelane_b32 v1, s63, 35 -; GCN-NEXT: v_writelane_b32 v1, s64, 36 -; GCN-NEXT: v_writelane_b32 v1, s65, 37 -; GCN-NEXT: v_writelane_b32 v1, s66, 38 -; GCN-NEXT: v_writelane_b32 v1, s67, 39 -; GCN-NEXT: v_writelane_b32 v1, s68, 40 -; GCN-NEXT: v_writelane_b32 v1, s69, 41 -; GCN-NEXT: v_writelane_b32 v1, s70, 42 -; GCN-NEXT: v_writelane_b32 v1, s71, 43 -; GCN-NEXT: v_writelane_b32 v1, s72, 44 -; GCN-NEXT: v_writelane_b32 v1, s73, 45 -; GCN-NEXT: v_writelane_b32 v1, s74, 46 -; GCN-NEXT: v_writelane_b32 v1, s75, 47 -; GCN-NEXT: v_writelane_b32 v1, s76, 48 -; GCN-NEXT: v_writelane_b32 v1, s77, 49 -; GCN-NEXT: v_writelane_b32 v1, s78, 50 -; GCN-NEXT: v_writelane_b32 v1, s79, 51 -; GCN-NEXT: v_writelane_b32 v1, s80, 52 -; GCN-NEXT: v_writelane_b32 v1, s81, 53 -; GCN-NEXT: v_writelane_b32 v1, s82, 54 -; GCN-NEXT: v_writelane_b32 v1, s83, 55 -; GCN-NEXT: v_writelane_b32 v1, s84, 56 -; GCN-NEXT: v_writelane_b32 v1, s85, 57 -; GCN-NEXT: v_writelane_b32 v1, s86, 58 -; GCN-NEXT: v_writelane_b32 v1, s87, 59 -; GCN-NEXT: v_writelane_b32 v1, s88, 60 -; GCN-NEXT: v_writelane_b32 v1, s89, 61 -; GCN-NEXT: v_writelane_b32 v1, s90, 62 -; GCN-NEXT: v_writelane_b32 v1, s91, 63 -; GCN-NEXT: v_writelane_b32 v2, s4, 0 -; GCN-NEXT: v_writelane_b32 v2, s5, 1 -; GCN-NEXT: v_writelane_b32 v2, s6, 2 -; GCN-NEXT: v_writelane_b32 v2, s7, 3 -; GCN-NEXT: v_writelane_b32 v2, s8, 4 -; GCN-NEXT: v_writelane_b32 v2, s9, 5 -; GCN-NEXT: v_writelane_b32 v2, s10, 6 -; GCN-NEXT: v_writelane_b32 v2, s11, 7 +; GCN-NEXT: v_writelane_b32 v0, s0, 0 +; GCN-NEXT: v_writelane_b32 v0, s4, 1 +; GCN-NEXT: v_writelane_b32 v0, s5, 2 +; GCN-NEXT: v_writelane_b32 v0, s6, 3 +; GCN-NEXT: v_writelane_b32 v0, s7, 4 +; GCN-NEXT: v_writelane_b32 v0, s8, 5 +; GCN-NEXT: v_writelane_b32 v0, s9, 6 +; GCN-NEXT: v_writelane_b32 v0, s10, 7 +; GCN-NEXT: v_writelane_b32 v0, s11, 8 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s0, 9 +; GCN-NEXT: v_writelane_b32 v0, s1, 10 +; GCN-NEXT: v_writelane_b32 v0, s2, 11 +; GCN-NEXT: v_writelane_b32 v0, s3, 12 +; GCN-NEXT: v_writelane_b32 v0, s4, 13 +; GCN-NEXT: v_writelane_b32 v0, s5, 14 +; GCN-NEXT: v_writelane_b32 v0, s6, 15 +; GCN-NEXT: v_writelane_b32 v0, s7, 16 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s0, 17 +; GCN-NEXT: v_writelane_b32 v0, s1, 18 +; GCN-NEXT: v_writelane_b32 v0, s2, 19 +; GCN-NEXT: v_writelane_b32 v0, s3, 20 +; GCN-NEXT: v_writelane_b32 v0, s4, 21 +; GCN-NEXT: v_writelane_b32 v0, s5, 22 +; GCN-NEXT: v_writelane_b32 v0, s6, 23 +; GCN-NEXT: v_writelane_b32 v0, s7, 24 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s0, 25 +; GCN-NEXT: v_writelane_b32 v0, s1, 26 +; GCN-NEXT: v_writelane_b32 v0, s2, 27 +; GCN-NEXT: v_writelane_b32 v0, s3, 28 +; GCN-NEXT: v_writelane_b32 v0, s4, 29 +; GCN-NEXT: v_writelane_b32 v0, s5, 30 +; GCN-NEXT: v_writelane_b32 v0, s6, 31 +; GCN-NEXT: v_writelane_b32 v0, s7, 32 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s0, 33 +; GCN-NEXT: v_writelane_b32 v0, s1, 34 +; GCN-NEXT: v_writelane_b32 v0, s2, 35 +; GCN-NEXT: v_writelane_b32 v0, s3, 36 +; GCN-NEXT: v_writelane_b32 v0, s4, 37 +; GCN-NEXT: v_writelane_b32 v0, s5, 38 +; GCN-NEXT: v_writelane_b32 v0, s6, 39 +; GCN-NEXT: v_writelane_b32 v0, s7, 40 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s0, 41 +; GCN-NEXT: v_writelane_b32 v0, s1, 42 +; GCN-NEXT: v_writelane_b32 v0, s2, 43 +; GCN-NEXT: v_writelane_b32 v0, s3, 44 +; GCN-NEXT: v_writelane_b32 v0, s4, 45 +; GCN-NEXT: v_writelane_b32 v0, s5, 46 +; GCN-NEXT: v_writelane_b32 v0, s6, 47 +; GCN-NEXT: v_writelane_b32 v0, s7, 48 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s0, 49 +; GCN-NEXT: v_writelane_b32 v0, s1, 50 +; GCN-NEXT: v_writelane_b32 v0, s2, 51 +; GCN-NEXT: v_writelane_b32 v0, s3, 52 +; GCN-NEXT: v_writelane_b32 v0, s4, 53 +; GCN-NEXT: v_writelane_b32 v0, s5, 54 +; GCN-NEXT: v_writelane_b32 v0, s6, 55 +; GCN-NEXT: v_writelane_b32 v0, s7, 56 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_mov_b32 s8, 0 +; GCN-NEXT: v_readlane_b32 s9, v0, 0 +; GCN-NEXT: s_cmp_lg_u32 s9, s8 +; GCN-NEXT: v_writelane_b32 v0, s12, 57 +; GCN-NEXT: v_writelane_b32 v0, s13, 58 +; GCN-NEXT: v_writelane_b32 v0, s14, 59 +; GCN-NEXT: v_writelane_b32 v0, s15, 60 +; GCN-NEXT: v_writelane_b32 v0, s16, 61 +; GCN-NEXT: v_writelane_b32 v0, s17, 62 +; GCN-NEXT: v_writelane_b32 v0, s18, 63 +; GCN-NEXT: v_writelane_b32 v1, s19, 0 +; GCN-NEXT: v_writelane_b32 v1, s20, 1 +; GCN-NEXT: v_writelane_b32 v1, s21, 2 +; GCN-NEXT: v_writelane_b32 v1, s22, 3 +; GCN-NEXT: v_writelane_b32 v1, s23, 4 +; GCN-NEXT: v_writelane_b32 v1, s24, 5 +; GCN-NEXT: v_writelane_b32 v1, s25, 6 +; GCN-NEXT: v_writelane_b32 v1, s26, 7 +; GCN-NEXT: v_writelane_b32 v1, s27, 8 +; GCN-NEXT: v_writelane_b32 v1, s36, 9 +; GCN-NEXT: v_writelane_b32 v1, s37, 10 +; GCN-NEXT: v_writelane_b32 v1, s38, 11 +; GCN-NEXT: v_writelane_b32 v1, s39, 12 +; GCN-NEXT: v_writelane_b32 v1, s40, 13 +; GCN-NEXT: v_writelane_b32 v1, s41, 14 +; GCN-NEXT: v_writelane_b32 v1, s42, 15 +; GCN-NEXT: v_writelane_b32 v1, s43, 16 +; GCN-NEXT: v_writelane_b32 v1, s44, 17 +; GCN-NEXT: v_writelane_b32 v1, s45, 18 +; GCN-NEXT: v_writelane_b32 v1, s46, 19 +; GCN-NEXT: v_writelane_b32 v1, s47, 20 +; GCN-NEXT: v_writelane_b32 v1, s48, 21 +; GCN-NEXT: v_writelane_b32 v1, s49, 22 +; GCN-NEXT: v_writelane_b32 v1, s50, 23 +; GCN-NEXT: v_writelane_b32 v1, s51, 24 +; GCN-NEXT: v_writelane_b32 v1, s52, 25 +; GCN-NEXT: v_writelane_b32 v1, s53, 26 +; GCN-NEXT: v_writelane_b32 v1, s54, 27 +; GCN-NEXT: v_writelane_b32 v1, s55, 28 +; GCN-NEXT: v_writelane_b32 v1, s56, 29 +; GCN-NEXT: v_writelane_b32 v1, s57, 30 +; GCN-NEXT: v_writelane_b32 v1, s58, 31 +; GCN-NEXT: v_writelane_b32 v1, s59, 32 +; GCN-NEXT: v_writelane_b32 v1, s60, 33 +; GCN-NEXT: v_writelane_b32 v1, s61, 34 +; GCN-NEXT: v_writelane_b32 v1, s62, 35 +; GCN-NEXT: v_writelane_b32 v1, s63, 36 +; GCN-NEXT: v_writelane_b32 v1, s64, 37 +; GCN-NEXT: v_writelane_b32 v1, s65, 38 +; GCN-NEXT: v_writelane_b32 v1, s66, 39 +; GCN-NEXT: v_writelane_b32 v1, s67, 40 +; GCN-NEXT: v_writelane_b32 v1, s68, 41 +; GCN-NEXT: v_writelane_b32 v1, s69, 42 +; GCN-NEXT: v_writelane_b32 v1, s70, 43 +; GCN-NEXT: v_writelane_b32 v1, s71, 44 +; GCN-NEXT: v_writelane_b32 v1, s72, 45 +; GCN-NEXT: v_writelane_b32 v1, s73, 46 +; GCN-NEXT: v_writelane_b32 v1, s74, 47 +; GCN-NEXT: v_writelane_b32 v1, s75, 48 +; GCN-NEXT: v_writelane_b32 v1, s76, 49 +; GCN-NEXT: v_writelane_b32 v1, s77, 50 +; GCN-NEXT: v_writelane_b32 v1, s78, 51 +; GCN-NEXT: v_writelane_b32 v1, s79, 52 +; GCN-NEXT: v_writelane_b32 v1, s80, 53 +; GCN-NEXT: v_writelane_b32 v1, s81, 54 +; GCN-NEXT: v_writelane_b32 v1, s82, 55 +; GCN-NEXT: v_writelane_b32 v1, s83, 56 +; GCN-NEXT: v_writelane_b32 v1, s84, 57 +; GCN-NEXT: v_writelane_b32 v1, s85, 58 +; GCN-NEXT: v_writelane_b32 v1, s86, 59 +; GCN-NEXT: v_writelane_b32 v1, s87, 60 +; GCN-NEXT: v_writelane_b32 v1, s88, 61 +; GCN-NEXT: v_writelane_b32 v1, s89, 62 +; GCN-NEXT: v_writelane_b32 v1, s90, 63 +; GCN-NEXT: v_writelane_b32 v2, s91, 0 +; GCN-NEXT: v_writelane_b32 v2, s0, 1 +; GCN-NEXT: v_writelane_b32 v2, s1, 2 +; GCN-NEXT: v_writelane_b32 v2, s2, 3 +; GCN-NEXT: v_writelane_b32 v2, s3, 4 +; GCN-NEXT: v_writelane_b32 v2, s4, 5 +; GCN-NEXT: v_writelane_b32 v2, s5, 6 +; GCN-NEXT: v_writelane_b32 v2, s6, 7 +; GCN-NEXT: v_writelane_b32 v2, s7, 8 ; GCN-NEXT: s_cbranch_scc1 BB0_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-NEXT: v_readlane_b32 s2, v0, 2 -; GCN-NEXT: v_readlane_b32 s3, v0, 3 -; GCN-NEXT: v_readlane_b32 s4, v0, 4 -; GCN-NEXT: v_readlane_b32 s5, v0, 5 -; GCN-NEXT: v_readlane_b32 s6, v0, 6 -; GCN-NEXT: v_readlane_b32 s7, v0, 7 +; GCN-NEXT: v_readlane_b32 s0, v0, 1 +; GCN-NEXT: v_readlane_b32 s1, v0, 2 +; GCN-NEXT: v_readlane_b32 s2, v0, 3 +; GCN-NEXT: v_readlane_b32 s3, v0, 4 +; GCN-NEXT: v_readlane_b32 s4, v0, 5 +; GCN-NEXT: v_readlane_b32 s5, v0, 6 +; GCN-NEXT: v_readlane_b32 s6, v0, 7 +; GCN-NEXT: v_readlane_b32 s7, v0, 8 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 56 -; GCN-NEXT: v_readlane_b32 s1, v0, 57 -; GCN-NEXT: v_readlane_b32 s2, v0, 58 -; GCN-NEXT: v_readlane_b32 s3, v0, 59 -; GCN-NEXT: v_readlane_b32 s4, v0, 60 -; GCN-NEXT: v_readlane_b32 s5, v0, 61 -; GCN-NEXT: v_readlane_b32 s6, v0, 62 -; GCN-NEXT: v_readlane_b32 s7, v0, 63 +; GCN-NEXT: v_readlane_b32 s0, v0, 57 +; GCN-NEXT: v_readlane_b32 s1, v0, 58 +; GCN-NEXT: v_readlane_b32 s2, v0, 59 +; GCN-NEXT: v_readlane_b32 s3, v0, 60 +; GCN-NEXT: v_readlane_b32 s4, v0, 61 +; GCN-NEXT: v_readlane_b32 s5, v0, 62 +; GCN-NEXT: v_readlane_b32 s6, v0, 63 +; GCN-NEXT: v_readlane_b32 s7, v1, 0 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 0 -; GCN-NEXT: v_readlane_b32 s1, v1, 1 -; GCN-NEXT: v_readlane_b32 s2, v1, 2 -; GCN-NEXT: v_readlane_b32 s3, v1, 3 -; GCN-NEXT: v_readlane_b32 s4, v1, 4 -; GCN-NEXT: v_readlane_b32 s5, v1, 5 -; GCN-NEXT: v_readlane_b32 s6, v1, 6 -; GCN-NEXT: v_readlane_b32 s7, v1, 7 +; GCN-NEXT: v_readlane_b32 s0, v1, 1 +; GCN-NEXT: v_readlane_b32 s1, v1, 2 +; GCN-NEXT: v_readlane_b32 s2, v1, 3 +; GCN-NEXT: v_readlane_b32 s3, v1, 4 +; GCN-NEXT: v_readlane_b32 s4, v1, 5 +; GCN-NEXT: v_readlane_b32 s5, v1, 6 +; GCN-NEXT: v_readlane_b32 s6, v1, 7 +; GCN-NEXT: v_readlane_b32 s7, v1, 8 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 8 -; GCN-NEXT: v_readlane_b32 s1, v1, 9 -; GCN-NEXT: v_readlane_b32 s2, v1, 10 -; GCN-NEXT: v_readlane_b32 s3, v1, 11 -; GCN-NEXT: v_readlane_b32 s4, v1, 12 -; GCN-NEXT: v_readlane_b32 s5, v1, 13 -; GCN-NEXT: v_readlane_b32 s6, v1, 14 -; GCN-NEXT: v_readlane_b32 s7, v1, 15 +; GCN-NEXT: v_readlane_b32 s0, v1, 9 +; GCN-NEXT: v_readlane_b32 s1, v1, 10 +; GCN-NEXT: v_readlane_b32 s2, v1, 11 +; GCN-NEXT: v_readlane_b32 s3, v1, 12 +; GCN-NEXT: v_readlane_b32 s4, v1, 13 +; GCN-NEXT: v_readlane_b32 s5, v1, 14 +; GCN-NEXT: v_readlane_b32 s6, v1, 15 +; GCN-NEXT: v_readlane_b32 s7, v1, 16 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 16 -; GCN-NEXT: v_readlane_b32 s1, v1, 17 -; GCN-NEXT: v_readlane_b32 s2, v1, 18 -; GCN-NEXT: v_readlane_b32 s3, v1, 19 -; GCN-NEXT: v_readlane_b32 s4, v1, 20 -; GCN-NEXT: v_readlane_b32 s5, v1, 21 -; GCN-NEXT: v_readlane_b32 s6, v1, 22 -; GCN-NEXT: v_readlane_b32 s7, v1, 23 +; GCN-NEXT: v_readlane_b32 s0, v1, 17 +; GCN-NEXT: v_readlane_b32 s1, v1, 18 +; GCN-NEXT: v_readlane_b32 s2, v1, 19 +; GCN-NEXT: v_readlane_b32 s3, v1, 20 +; GCN-NEXT: v_readlane_b32 s4, v1, 21 +; GCN-NEXT: v_readlane_b32 s5, v1, 22 +; GCN-NEXT: v_readlane_b32 s6, v1, 23 +; GCN-NEXT: v_readlane_b32 s7, v1, 24 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 24 -; GCN-NEXT: v_readlane_b32 s1, v1, 25 -; GCN-NEXT: v_readlane_b32 s2, v1, 26 -; GCN-NEXT: v_readlane_b32 s3, v1, 27 -; GCN-NEXT: v_readlane_b32 s4, v1, 28 -; GCN-NEXT: v_readlane_b32 s5, v1, 29 -; GCN-NEXT: v_readlane_b32 s6, v1, 30 -; GCN-NEXT: v_readlane_b32 s7, v1, 31 +; GCN-NEXT: v_readlane_b32 s0, v1, 25 +; GCN-NEXT: v_readlane_b32 s1, v1, 26 +; GCN-NEXT: v_readlane_b32 s2, v1, 27 +; GCN-NEXT: v_readlane_b32 s3, v1, 28 +; GCN-NEXT: v_readlane_b32 s4, v1, 29 +; GCN-NEXT: v_readlane_b32 s5, v1, 30 +; GCN-NEXT: v_readlane_b32 s6, v1, 31 +; GCN-NEXT: v_readlane_b32 s7, v1, 32 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 32 -; GCN-NEXT: v_readlane_b32 s1, v1, 33 -; GCN-NEXT: v_readlane_b32 s2, v1, 34 -; GCN-NEXT: v_readlane_b32 s3, v1, 35 -; GCN-NEXT: v_readlane_b32 s4, v1, 36 -; GCN-NEXT: v_readlane_b32 s5, v1, 37 -; GCN-NEXT: v_readlane_b32 s6, v1, 38 -; GCN-NEXT: v_readlane_b32 s7, v1, 39 +; GCN-NEXT: v_readlane_b32 s0, v1, 33 +; GCN-NEXT: v_readlane_b32 s1, v1, 34 +; GCN-NEXT: v_readlane_b32 s2, v1, 35 +; GCN-NEXT: v_readlane_b32 s3, v1, 36 +; GCN-NEXT: v_readlane_b32 s4, v1, 37 +; GCN-NEXT: v_readlane_b32 s5, v1, 38 +; GCN-NEXT: v_readlane_b32 s6, v1, 39 +; GCN-NEXT: v_readlane_b32 s7, v1, 40 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 40 -; GCN-NEXT: v_readlane_b32 s1, v1, 41 -; GCN-NEXT: v_readlane_b32 s2, v1, 42 -; GCN-NEXT: v_readlane_b32 s3, v1, 43 -; GCN-NEXT: v_readlane_b32 s4, v1, 44 -; GCN-NEXT: v_readlane_b32 s5, v1, 45 -; GCN-NEXT: v_readlane_b32 s6, v1, 46 -; GCN-NEXT: v_readlane_b32 s7, v1, 47 +; GCN-NEXT: v_readlane_b32 s0, v1, 41 +; GCN-NEXT: v_readlane_b32 s1, v1, 42 +; GCN-NEXT: v_readlane_b32 s2, v1, 43 +; GCN-NEXT: v_readlane_b32 s3, v1, 44 +; GCN-NEXT: v_readlane_b32 s4, v1, 45 +; GCN-NEXT: v_readlane_b32 s5, v1, 46 +; GCN-NEXT: v_readlane_b32 s6, v1, 47 +; GCN-NEXT: v_readlane_b32 s7, v1, 48 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 48 -; GCN-NEXT: v_readlane_b32 s1, v1, 49 -; GCN-NEXT: v_readlane_b32 s2, v1, 50 -; GCN-NEXT: v_readlane_b32 s3, v1, 51 -; GCN-NEXT: v_readlane_b32 s4, v1, 52 -; GCN-NEXT: v_readlane_b32 s5, v1, 53 -; GCN-NEXT: v_readlane_b32 s6, v1, 54 -; GCN-NEXT: v_readlane_b32 s7, v1, 55 +; GCN-NEXT: v_readlane_b32 s0, v1, 49 +; GCN-NEXT: v_readlane_b32 s1, v1, 50 +; GCN-NEXT: v_readlane_b32 s2, v1, 51 +; GCN-NEXT: v_readlane_b32 s3, v1, 52 +; GCN-NEXT: v_readlane_b32 s4, v1, 53 +; GCN-NEXT: v_readlane_b32 s5, v1, 54 +; GCN-NEXT: v_readlane_b32 s6, v1, 55 +; GCN-NEXT: v_readlane_b32 s7, v1, 56 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v1, 56 -; GCN-NEXT: v_readlane_b32 s1, v1, 57 -; GCN-NEXT: v_readlane_b32 s2, v1, 58 -; GCN-NEXT: v_readlane_b32 s3, v1, 59 -; GCN-NEXT: v_readlane_b32 s4, v1, 60 -; GCN-NEXT: v_readlane_b32 s5, v1, 61 -; GCN-NEXT: v_readlane_b32 s6, v1, 62 -; GCN-NEXT: v_readlane_b32 s7, v1, 63 +; GCN-NEXT: v_readlane_b32 s0, v1, 57 +; GCN-NEXT: v_readlane_b32 s1, v1, 58 +; GCN-NEXT: v_readlane_b32 s2, v1, 59 +; GCN-NEXT: v_readlane_b32 s3, v1, 60 +; GCN-NEXT: v_readlane_b32 s4, v1, 61 +; GCN-NEXT: v_readlane_b32 s5, v1, 62 +; GCN-NEXT: v_readlane_b32 s6, v1, 63 +; GCN-NEXT: v_readlane_b32 s7, v2, 0 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 8 -; GCN-NEXT: v_readlane_b32 s1, v0, 9 -; GCN-NEXT: v_readlane_b32 s2, v0, 10 -; GCN-NEXT: v_readlane_b32 s3, v0, 11 -; GCN-NEXT: v_readlane_b32 s4, v0, 12 -; GCN-NEXT: v_readlane_b32 s5, v0, 13 -; GCN-NEXT: v_readlane_b32 s6, v0, 14 -; GCN-NEXT: v_readlane_b32 s7, v0, 15 +; GCN-NEXT: v_readlane_b32 s0, v0, 9 +; GCN-NEXT: v_readlane_b32 s1, v0, 10 +; GCN-NEXT: v_readlane_b32 s2, v0, 11 +; GCN-NEXT: v_readlane_b32 s3, v0, 12 +; GCN-NEXT: v_readlane_b32 s4, v0, 13 +; GCN-NEXT: v_readlane_b32 s5, v0, 14 +; GCN-NEXT: v_readlane_b32 s6, v0, 15 +; GCN-NEXT: v_readlane_b32 s7, v0, 16 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 16 -; GCN-NEXT: v_readlane_b32 s1, v0, 17 -; GCN-NEXT: v_readlane_b32 s2, v0, 18 -; GCN-NEXT: v_readlane_b32 s3, v0, 19 -; GCN-NEXT: v_readlane_b32 s4, v0, 20 -; GCN-NEXT: v_readlane_b32 s5, v0, 21 -; GCN-NEXT: v_readlane_b32 s6, v0, 22 -; GCN-NEXT: v_readlane_b32 s7, v0, 23 +; GCN-NEXT: v_readlane_b32 s0, v0, 17 +; GCN-NEXT: v_readlane_b32 s1, v0, 18 +; GCN-NEXT: v_readlane_b32 s2, v0, 19 +; GCN-NEXT: v_readlane_b32 s3, v0, 20 +; GCN-NEXT: v_readlane_b32 s4, v0, 21 +; GCN-NEXT: v_readlane_b32 s5, v0, 22 +; GCN-NEXT: v_readlane_b32 s6, v0, 23 +; GCN-NEXT: v_readlane_b32 s7, v0, 24 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 24 -; GCN-NEXT: v_readlane_b32 s1, v0, 25 -; GCN-NEXT: v_readlane_b32 s2, v0, 26 -; GCN-NEXT: v_readlane_b32 s3, v0, 27 -; GCN-NEXT: v_readlane_b32 s4, v0, 28 -; GCN-NEXT: v_readlane_b32 s5, v0, 29 -; GCN-NEXT: v_readlane_b32 s6, v0, 30 -; GCN-NEXT: v_readlane_b32 s7, v0, 31 +; GCN-NEXT: v_readlane_b32 s0, v0, 25 +; GCN-NEXT: v_readlane_b32 s1, v0, 26 +; GCN-NEXT: v_readlane_b32 s2, v0, 27 +; GCN-NEXT: v_readlane_b32 s3, v0, 28 +; GCN-NEXT: v_readlane_b32 s4, v0, 29 +; GCN-NEXT: v_readlane_b32 s5, v0, 30 +; GCN-NEXT: v_readlane_b32 s6, v0, 31 +; GCN-NEXT: v_readlane_b32 s7, v0, 32 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 32 -; GCN-NEXT: v_readlane_b32 s1, v0, 33 -; GCN-NEXT: v_readlane_b32 s2, v0, 34 -; GCN-NEXT: v_readlane_b32 s3, v0, 35 -; GCN-NEXT: v_readlane_b32 s4, v0, 36 -; GCN-NEXT: v_readlane_b32 s5, v0, 37 -; GCN-NEXT: v_readlane_b32 s6, v0, 38 -; GCN-NEXT: v_readlane_b32 s7, v0, 39 +; GCN-NEXT: v_readlane_b32 s0, v0, 33 +; GCN-NEXT: v_readlane_b32 s1, v0, 34 +; GCN-NEXT: v_readlane_b32 s2, v0, 35 +; GCN-NEXT: v_readlane_b32 s3, v0, 36 +; GCN-NEXT: v_readlane_b32 s4, v0, 37 +; GCN-NEXT: v_readlane_b32 s5, v0, 38 +; GCN-NEXT: v_readlane_b32 s6, v0, 39 +; GCN-NEXT: v_readlane_b32 s7, v0, 40 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 40 -; GCN-NEXT: v_readlane_b32 s1, v0, 41 -; GCN-NEXT: v_readlane_b32 s2, v0, 42 -; GCN-NEXT: v_readlane_b32 s3, v0, 43 -; GCN-NEXT: v_readlane_b32 s4, v0, 44 -; GCN-NEXT: v_readlane_b32 s5, v0, 45 -; GCN-NEXT: v_readlane_b32 s6, v0, 46 -; GCN-NEXT: v_readlane_b32 s7, v0, 47 +; GCN-NEXT: v_readlane_b32 s0, v0, 41 +; GCN-NEXT: v_readlane_b32 s1, v0, 42 +; GCN-NEXT: v_readlane_b32 s2, v0, 43 +; GCN-NEXT: v_readlane_b32 s3, v0, 44 +; GCN-NEXT: v_readlane_b32 s4, v0, 45 +; GCN-NEXT: v_readlane_b32 s5, v0, 46 +; GCN-NEXT: v_readlane_b32 s6, v0, 47 +; GCN-NEXT: v_readlane_b32 s7, v0, 48 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 48 -; GCN-NEXT: v_readlane_b32 s1, v0, 49 -; GCN-NEXT: v_readlane_b32 s2, v0, 50 -; GCN-NEXT: v_readlane_b32 s3, v0, 51 -; GCN-NEXT: v_readlane_b32 s4, v0, 52 -; GCN-NEXT: v_readlane_b32 s5, v0, 53 -; GCN-NEXT: v_readlane_b32 s6, v0, 54 -; GCN-NEXT: v_readlane_b32 s7, v0, 55 +; GCN-NEXT: v_readlane_b32 s0, v0, 49 +; GCN-NEXT: v_readlane_b32 s1, v0, 50 +; GCN-NEXT: v_readlane_b32 s2, v0, 51 +; GCN-NEXT: v_readlane_b32 s3, v0, 52 +; GCN-NEXT: v_readlane_b32 s4, v0, 53 +; GCN-NEXT: v_readlane_b32 s5, v0, 54 +; GCN-NEXT: v_readlane_b32 s6, v0, 55 +; GCN-NEXT: v_readlane_b32 s7, v0, 56 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v2, 0 -; GCN-NEXT: v_readlane_b32 s1, v2, 1 -; GCN-NEXT: v_readlane_b32 s2, v2, 2 -; GCN-NEXT: v_readlane_b32 s3, v2, 3 -; GCN-NEXT: v_readlane_b32 s4, v2, 4 -; GCN-NEXT: v_readlane_b32 s5, v2, 5 -; GCN-NEXT: v_readlane_b32 s6, v2, 6 -; GCN-NEXT: v_readlane_b32 s7, v2, 7 +; GCN-NEXT: v_readlane_b32 s0, v2, 1 +; GCN-NEXT: v_readlane_b32 s1, v2, 2 +; GCN-NEXT: v_readlane_b32 s2, v2, 3 +; GCN-NEXT: v_readlane_b32 s3, v2, 4 +; GCN-NEXT: v_readlane_b32 s4, v2, 5 +; GCN-NEXT: v_readlane_b32 s5, v2, 6 +; GCN-NEXT: v_readlane_b32 s6, v2, 7 +; GCN-NEXT: v_readlane_b32 s7, v2, 8 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:7] ; GCN-NEXT: ;;#ASMEND @@ -442,193 +444,195 @@ define amdgpu_kernel void @split_sgpr_spill_2_vgprs(i32 addrspace(1)* %out, i32 %in) #1 { ; GCN-LABEL: split_sgpr_spill_2_vgprs: ; GCN: ; %bb.0: -; GCN-NEXT: s_load_dword s2, s[0:1], 0xb +; GCN-NEXT: s_load_dword s0, s[0:1], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[4:19] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[36:51] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 0 -; GCN-NEXT: v_writelane_b32 v0, s5, 1 -; GCN-NEXT: v_writelane_b32 v0, s6, 2 -; GCN-NEXT: v_writelane_b32 v0, s7, 3 -; GCN-NEXT: v_writelane_b32 v0, s8, 4 -; GCN-NEXT: v_writelane_b32 v0, s9, 5 -; GCN-NEXT: v_writelane_b32 v0, s10, 6 -; GCN-NEXT: v_writelane_b32 v0, s11, 7 -; GCN-NEXT: v_writelane_b32 v0, s12, 8 -; GCN-NEXT: v_writelane_b32 v0, s13, 9 -; GCN-NEXT: v_writelane_b32 v0, s14, 10 -; GCN-NEXT: v_writelane_b32 v0, s15, 11 -; GCN-NEXT: v_writelane_b32 v0, s16, 12 -; GCN-NEXT: v_writelane_b32 v0, s17, 13 -; GCN-NEXT: v_writelane_b32 v0, s18, 14 -; GCN-NEXT: v_writelane_b32 v0, s19, 15 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:19] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v0, s4, 16 -; GCN-NEXT: v_writelane_b32 v0, s5, 17 -; GCN-NEXT: v_writelane_b32 v0, s6, 18 -; GCN-NEXT: v_writelane_b32 v0, s7, 19 -; GCN-NEXT: v_writelane_b32 v0, s8, 20 -; GCN-NEXT: v_writelane_b32 v0, s9, 21 -; GCN-NEXT: v_writelane_b32 v0, s10, 22 -; GCN-NEXT: v_writelane_b32 v0, s11, 23 -; GCN-NEXT: v_writelane_b32 v0, s12, 24 -; GCN-NEXT: v_writelane_b32 v0, s13, 25 -; GCN-NEXT: v_writelane_b32 v0, s14, 26 -; GCN-NEXT: v_writelane_b32 v0, s15, 27 -; GCN-NEXT: v_writelane_b32 v0, s16, 28 -; GCN-NEXT: v_writelane_b32 v0, s17, 29 -; GCN-NEXT: v_writelane_b32 v0, s18, 30 -; GCN-NEXT: v_writelane_b32 v0, s19, 31 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:19] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[20:27] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:1] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s3, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_cmp_lg_u32 s2, s3 -; GCN-NEXT: v_writelane_b32 v0, s36, 32 -; GCN-NEXT: v_writelane_b32 v0, s37, 33 -; GCN-NEXT: v_writelane_b32 v0, s38, 34 -; GCN-NEXT: v_writelane_b32 v0, s39, 35 -; GCN-NEXT: v_writelane_b32 v0, s40, 36 -; GCN-NEXT: v_writelane_b32 v0, s41, 37 -; GCN-NEXT: v_writelane_b32 v0, s42, 38 -; GCN-NEXT: v_writelane_b32 v0, s43, 39 -; GCN-NEXT: v_writelane_b32 v0, s44, 40 -; GCN-NEXT: v_writelane_b32 v0, s45, 41 -; GCN-NEXT: v_writelane_b32 v0, s46, 42 -; GCN-NEXT: v_writelane_b32 v0, s47, 43 -; GCN-NEXT: v_writelane_b32 v0, s48, 44 -; GCN-NEXT: v_writelane_b32 v0, s49, 45 -; GCN-NEXT: v_writelane_b32 v0, s50, 46 -; GCN-NEXT: v_writelane_b32 v0, s51, 47 -; GCN-NEXT: v_writelane_b32 v0, s4, 48 -; GCN-NEXT: v_writelane_b32 v0, s5, 49 -; GCN-NEXT: v_writelane_b32 v0, s6, 50 -; GCN-NEXT: v_writelane_b32 v0, s7, 51 -; GCN-NEXT: v_writelane_b32 v0, s8, 52 -; GCN-NEXT: v_writelane_b32 v0, s9, 53 -; GCN-NEXT: v_writelane_b32 v0, s10, 54 -; GCN-NEXT: v_writelane_b32 v0, s11, 55 -; GCN-NEXT: v_writelane_b32 v0, s12, 56 -; GCN-NEXT: v_writelane_b32 v0, s13, 57 -; GCN-NEXT: v_writelane_b32 v0, s14, 58 -; GCN-NEXT: v_writelane_b32 v0, s15, 59 -; GCN-NEXT: v_writelane_b32 v0, s16, 60 -; GCN-NEXT: v_writelane_b32 v0, s17, 61 -; GCN-NEXT: v_writelane_b32 v0, s18, 62 -; GCN-NEXT: v_writelane_b32 v0, s19, 63 -; GCN-NEXT: v_writelane_b32 v1, s20, 0 -; GCN-NEXT: v_writelane_b32 v1, s21, 1 -; GCN-NEXT: v_writelane_b32 v1, s22, 2 -; GCN-NEXT: v_writelane_b32 v1, s23, 3 -; GCN-NEXT: v_writelane_b32 v1, s24, 4 -; GCN-NEXT: v_writelane_b32 v1, s25, 5 -; GCN-NEXT: v_writelane_b32 v1, s26, 6 -; GCN-NEXT: v_writelane_b32 v1, s27, 7 -; GCN-NEXT: v_writelane_b32 v1, s0, 8 -; GCN-NEXT: v_writelane_b32 v1, s1, 9 +; GCN-NEXT: v_writelane_b32 v0, s0, 0 +; GCN-NEXT: v_writelane_b32 v0, s4, 1 +; GCN-NEXT: v_writelane_b32 v0, s5, 2 +; GCN-NEXT: v_writelane_b32 v0, s6, 3 +; GCN-NEXT: v_writelane_b32 v0, s7, 4 +; GCN-NEXT: v_writelane_b32 v0, s8, 5 +; GCN-NEXT: v_writelane_b32 v0, s9, 6 +; GCN-NEXT: v_writelane_b32 v0, s10, 7 +; GCN-NEXT: v_writelane_b32 v0, s11, 8 +; GCN-NEXT: v_writelane_b32 v0, s12, 9 +; GCN-NEXT: v_writelane_b32 v0, s13, 10 +; GCN-NEXT: v_writelane_b32 v0, s14, 11 +; GCN-NEXT: v_writelane_b32 v0, s15, 12 +; GCN-NEXT: v_writelane_b32 v0, s16, 13 +; GCN-NEXT: v_writelane_b32 v0, s17, 14 +; GCN-NEXT: v_writelane_b32 v0, s18, 15 +; GCN-NEXT: v_writelane_b32 v0, s19, 16 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:15] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[16:31] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_writelane_b32 v0, s0, 17 +; GCN-NEXT: v_writelane_b32 v0, s1, 18 +; GCN-NEXT: v_writelane_b32 v0, s2, 19 +; GCN-NEXT: v_writelane_b32 v0, s3, 20 +; GCN-NEXT: v_writelane_b32 v0, s4, 21 +; GCN-NEXT: v_writelane_b32 v0, s5, 22 +; GCN-NEXT: v_writelane_b32 v0, s6, 23 +; GCN-NEXT: v_writelane_b32 v0, s7, 24 +; GCN-NEXT: v_writelane_b32 v0, s8, 25 +; GCN-NEXT: v_writelane_b32 v0, s9, 26 +; GCN-NEXT: v_writelane_b32 v0, s10, 27 +; GCN-NEXT: v_writelane_b32 v0, s11, 28 +; GCN-NEXT: v_writelane_b32 v0, s12, 29 +; GCN-NEXT: v_writelane_b32 v0, s13, 30 +; GCN-NEXT: v_writelane_b32 v0, s14, 31 +; GCN-NEXT: v_writelane_b32 v0, s15, 32 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[8:9] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_mov_b32 s10, 0 +; GCN-NEXT: v_readlane_b32 s11, v0, 0 +; GCN-NEXT: s_cmp_lg_u32 s11, s10 +; GCN-NEXT: v_writelane_b32 v0, s36, 33 +; GCN-NEXT: v_writelane_b32 v0, s37, 34 +; GCN-NEXT: v_writelane_b32 v0, s38, 35 +; GCN-NEXT: v_writelane_b32 v0, s39, 36 +; GCN-NEXT: v_writelane_b32 v0, s40, 37 +; GCN-NEXT: v_writelane_b32 v0, s41, 38 +; GCN-NEXT: v_writelane_b32 v0, s42, 39 +; GCN-NEXT: v_writelane_b32 v0, s43, 40 +; GCN-NEXT: v_writelane_b32 v0, s44, 41 +; GCN-NEXT: v_writelane_b32 v0, s45, 42 +; GCN-NEXT: v_writelane_b32 v0, s46, 43 +; GCN-NEXT: v_writelane_b32 v0, s47, 44 +; GCN-NEXT: v_writelane_b32 v0, s48, 45 +; GCN-NEXT: v_writelane_b32 v0, s49, 46 +; GCN-NEXT: v_writelane_b32 v0, s50, 47 +; GCN-NEXT: v_writelane_b32 v0, s51, 48 +; GCN-NEXT: v_writelane_b32 v0, s16, 49 +; GCN-NEXT: v_writelane_b32 v0, s17, 50 +; GCN-NEXT: v_writelane_b32 v0, s18, 51 +; GCN-NEXT: v_writelane_b32 v0, s19, 52 +; GCN-NEXT: v_writelane_b32 v0, s20, 53 +; GCN-NEXT: v_writelane_b32 v0, s21, 54 +; GCN-NEXT: v_writelane_b32 v0, s22, 55 +; GCN-NEXT: v_writelane_b32 v0, s23, 56 +; GCN-NEXT: v_writelane_b32 v0, s24, 57 +; GCN-NEXT: v_writelane_b32 v0, s25, 58 +; GCN-NEXT: v_writelane_b32 v0, s26, 59 +; GCN-NEXT: v_writelane_b32 v0, s27, 60 +; GCN-NEXT: v_writelane_b32 v0, s28, 61 +; GCN-NEXT: v_writelane_b32 v0, s29, 62 +; GCN-NEXT: v_writelane_b32 v0, s30, 63 +; GCN-NEXT: v_writelane_b32 v1, s31, 0 +; GCN-NEXT: v_writelane_b32 v1, s0, 1 +; GCN-NEXT: v_writelane_b32 v1, s1, 2 +; GCN-NEXT: v_writelane_b32 v1, s2, 3 +; GCN-NEXT: v_writelane_b32 v1, s3, 4 +; GCN-NEXT: v_writelane_b32 v1, s4, 5 +; GCN-NEXT: v_writelane_b32 v1, s5, 6 +; GCN-NEXT: v_writelane_b32 v1, s6, 7 +; GCN-NEXT: v_writelane_b32 v1, s7, 8 +; GCN-NEXT: v_writelane_b32 v1, s8, 9 +; GCN-NEXT: v_writelane_b32 v1, s9, 10 ; GCN-NEXT: s_cbranch_scc1 BB1_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v0, 0 -; GCN-NEXT: v_readlane_b32 s1, v0, 1 -; GCN-NEXT: v_readlane_b32 s2, v0, 2 -; GCN-NEXT: v_readlane_b32 s3, v0, 3 -; GCN-NEXT: v_readlane_b32 s4, v0, 4 -; GCN-NEXT: v_readlane_b32 s5, v0, 5 -; GCN-NEXT: v_readlane_b32 s6, v0, 6 -; GCN-NEXT: v_readlane_b32 s7, v0, 7 -; GCN-NEXT: v_readlane_b32 s8, v0, 8 -; GCN-NEXT: v_readlane_b32 s9, v0, 9 -; GCN-NEXT: v_readlane_b32 s10, v0, 10 -; GCN-NEXT: v_readlane_b32 s11, v0, 11 -; GCN-NEXT: v_readlane_b32 s12, v0, 12 -; GCN-NEXT: v_readlane_b32 s13, v0, 13 -; GCN-NEXT: v_readlane_b32 s14, v0, 14 -; GCN-NEXT: v_readlane_b32 s15, v0, 15 +; GCN-NEXT: v_readlane_b32 s0, v0, 1 +; GCN-NEXT: v_readlane_b32 s1, v0, 2 +; GCN-NEXT: v_readlane_b32 s2, v0, 3 +; GCN-NEXT: v_readlane_b32 s3, v0, 4 +; GCN-NEXT: v_readlane_b32 s4, v0, 5 +; GCN-NEXT: v_readlane_b32 s5, v0, 6 +; GCN-NEXT: v_readlane_b32 s6, v0, 7 +; GCN-NEXT: v_readlane_b32 s7, v0, 8 +; GCN-NEXT: v_readlane_b32 s8, v0, 9 +; GCN-NEXT: v_readlane_b32 s9, v0, 10 +; GCN-NEXT: v_readlane_b32 s10, v0, 11 +; GCN-NEXT: v_readlane_b32 s11, v0, 12 +; GCN-NEXT: v_readlane_b32 s12, v0, 13 +; GCN-NEXT: v_readlane_b32 s13, v0, 14 +; GCN-NEXT: v_readlane_b32 s14, v0, 15 +; GCN-NEXT: v_readlane_b32 s15, v0, 16 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 32 -; GCN-NEXT: v_readlane_b32 s1, v0, 33 -; GCN-NEXT: v_readlane_b32 s2, v0, 34 -; GCN-NEXT: v_readlane_b32 s3, v0, 35 -; GCN-NEXT: v_readlane_b32 s4, v0, 36 -; GCN-NEXT: v_readlane_b32 s5, v0, 37 -; GCN-NEXT: v_readlane_b32 s6, v0, 38 -; GCN-NEXT: v_readlane_b32 s7, v0, 39 -; GCN-NEXT: v_readlane_b32 s8, v0, 40 -; GCN-NEXT: v_readlane_b32 s9, v0, 41 -; GCN-NEXT: v_readlane_b32 s10, v0, 42 -; GCN-NEXT: v_readlane_b32 s11, v0, 43 -; GCN-NEXT: v_readlane_b32 s12, v0, 44 -; GCN-NEXT: v_readlane_b32 s13, v0, 45 -; GCN-NEXT: v_readlane_b32 s14, v0, 46 -; GCN-NEXT: v_readlane_b32 s15, v0, 47 +; GCN-NEXT: v_readlane_b32 s0, v0, 33 +; GCN-NEXT: v_readlane_b32 s1, v0, 34 +; GCN-NEXT: v_readlane_b32 s2, v0, 35 +; GCN-NEXT: v_readlane_b32 s3, v0, 36 +; GCN-NEXT: v_readlane_b32 s4, v0, 37 +; GCN-NEXT: v_readlane_b32 s5, v0, 38 +; GCN-NEXT: v_readlane_b32 s6, v0, 39 +; GCN-NEXT: v_readlane_b32 s7, v0, 40 +; GCN-NEXT: v_readlane_b32 s8, v0, 41 +; GCN-NEXT: v_readlane_b32 s9, v0, 42 +; GCN-NEXT: v_readlane_b32 s10, v0, 43 +; GCN-NEXT: v_readlane_b32 s11, v0, 44 +; GCN-NEXT: v_readlane_b32 s12, v0, 45 +; GCN-NEXT: v_readlane_b32 s13, v0, 46 +; GCN-NEXT: v_readlane_b32 s14, v0, 47 +; GCN-NEXT: v_readlane_b32 s15, v0, 48 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 16 -; GCN-NEXT: v_readlane_b32 s1, v0, 17 -; GCN-NEXT: v_readlane_b32 s2, v0, 18 -; GCN-NEXT: v_readlane_b32 s3, v0, 19 -; GCN-NEXT: v_readlane_b32 s4, v0, 20 -; GCN-NEXT: v_readlane_b32 s5, v0, 21 -; GCN-NEXT: v_readlane_b32 s6, v0, 22 -; GCN-NEXT: v_readlane_b32 s7, v0, 23 -; GCN-NEXT: v_readlane_b32 s8, v0, 24 -; GCN-NEXT: v_readlane_b32 s9, v0, 25 -; GCN-NEXT: v_readlane_b32 s10, v0, 26 -; GCN-NEXT: v_readlane_b32 s11, v0, 27 -; GCN-NEXT: v_readlane_b32 s12, v0, 28 -; GCN-NEXT: v_readlane_b32 s13, v0, 29 -; GCN-NEXT: v_readlane_b32 s14, v0, 30 -; GCN-NEXT: v_readlane_b32 s15, v0, 31 +; GCN-NEXT: v_readlane_b32 s0, v0, 17 +; GCN-NEXT: v_readlane_b32 s1, v0, 18 +; GCN-NEXT: v_readlane_b32 s2, v0, 19 +; GCN-NEXT: v_readlane_b32 s3, v0, 20 +; GCN-NEXT: v_readlane_b32 s4, v0, 21 +; GCN-NEXT: v_readlane_b32 s5, v0, 22 +; GCN-NEXT: v_readlane_b32 s6, v0, 23 +; GCN-NEXT: v_readlane_b32 s7, v0, 24 +; GCN-NEXT: v_readlane_b32 s8, v0, 25 +; GCN-NEXT: v_readlane_b32 s9, v0, 26 +; GCN-NEXT: v_readlane_b32 s10, v0, 27 +; GCN-NEXT: v_readlane_b32 s11, v0, 28 +; GCN-NEXT: v_readlane_b32 s12, v0, 29 +; GCN-NEXT: v_readlane_b32 s13, v0, 30 +; GCN-NEXT: v_readlane_b32 s14, v0, 31 +; GCN-NEXT: v_readlane_b32 s15, v0, 32 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s16, v1, 0 -; GCN-NEXT: v_readlane_b32 s17, v1, 1 -; GCN-NEXT: v_readlane_b32 s18, v1, 2 -; GCN-NEXT: v_readlane_b32 s19, v1, 3 -; GCN-NEXT: v_readlane_b32 s20, v1, 4 -; GCN-NEXT: v_readlane_b32 s21, v1, 5 -; GCN-NEXT: v_readlane_b32 s22, v1, 6 -; GCN-NEXT: v_readlane_b32 s23, v1, 7 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[16:23] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s24, v1, 8 -; GCN-NEXT: v_readlane_b32 s25, v1, 9 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[24:25] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v0, 48 -; GCN-NEXT: v_readlane_b32 s1, v0, 49 -; GCN-NEXT: v_readlane_b32 s2, v0, 50 -; GCN-NEXT: v_readlane_b32 s3, v0, 51 -; GCN-NEXT: v_readlane_b32 s4, v0, 52 -; GCN-NEXT: v_readlane_b32 s5, v0, 53 -; GCN-NEXT: v_readlane_b32 s6, v0, 54 -; GCN-NEXT: v_readlane_b32 s7, v0, 55 -; GCN-NEXT: v_readlane_b32 s8, v0, 56 -; GCN-NEXT: v_readlane_b32 s9, v0, 57 -; GCN-NEXT: v_readlane_b32 s10, v0, 58 -; GCN-NEXT: v_readlane_b32 s11, v0, 59 -; GCN-NEXT: v_readlane_b32 s12, v0, 60 -; GCN-NEXT: v_readlane_b32 s13, v0, 61 -; GCN-NEXT: v_readlane_b32 s14, v0, 62 -; GCN-NEXT: v_readlane_b32 s15, v0, 63 +; GCN-NEXT: v_readlane_b32 s0, v1, 1 +; GCN-NEXT: v_readlane_b32 s1, v1, 2 +; GCN-NEXT: v_readlane_b32 s2, v1, 3 +; GCN-NEXT: v_readlane_b32 s3, v1, 4 +; GCN-NEXT: v_readlane_b32 s4, v1, 5 +; GCN-NEXT: v_readlane_b32 s5, v1, 6 +; GCN-NEXT: v_readlane_b32 s6, v1, 7 +; GCN-NEXT: v_readlane_b32 s7, v1, 8 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[0:7] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_readlane_b32 s0, v1, 9 +; GCN-NEXT: v_readlane_b32 s1, v1, 10 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; use s[0:1] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: v_readlane_b32 s0, v0, 49 +; GCN-NEXT: v_readlane_b32 s1, v0, 50 +; GCN-NEXT: v_readlane_b32 s2, v0, 51 +; GCN-NEXT: v_readlane_b32 s3, v0, 52 +; GCN-NEXT: v_readlane_b32 s4, v0, 53 +; GCN-NEXT: v_readlane_b32 s5, v0, 54 +; GCN-NEXT: v_readlane_b32 s6, v0, 55 +; GCN-NEXT: v_readlane_b32 s7, v0, 56 +; GCN-NEXT: v_readlane_b32 s8, v0, 57 +; GCN-NEXT: v_readlane_b32 s9, v0, 58 +; GCN-NEXT: v_readlane_b32 s10, v0, 59 +; GCN-NEXT: v_readlane_b32 s11, v0, 60 +; GCN-NEXT: v_readlane_b32 s12, v0, 61 +; GCN-NEXT: v_readlane_b32 s13, v0, 62 +; GCN-NEXT: v_readlane_b32 s14, v0, 63 +; GCN-NEXT: v_readlane_b32 s15, v1, 0 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND @@ -663,13 +667,13 @@ define amdgpu_kernel void @no_vgprs_last_sgpr_spill(i32 addrspace(1)* %out, i32 %in) #1 { ; GCN-LABEL: no_vgprs_last_sgpr_spill: ; GCN: ; %bb.0: -; GCN-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0 -; GCN-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1 -; GCN-NEXT: s_mov_b32 s22, -1 -; GCN-NEXT: s_mov_b32 s23, 0xe8f000 -; GCN-NEXT: s_add_u32 s20, s20, s3 -; GCN-NEXT: s_addc_u32 s21, s21, 0 -; GCN-NEXT: s_load_dword s2, s[0:1], 0xb +; GCN-NEXT: s_mov_b32 s56, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s57, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s58, -1 +; GCN-NEXT: s_mov_b32 s59, 0xe8f000 +; GCN-NEXT: s_add_u32 s56, s56, s3 +; GCN-NEXT: s_addc_u32 s57, s57, 0 +; GCN-NEXT: s_load_dword s0, s[0:1], 0xb ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: ;;#ASMSTART @@ -688,177 +692,179 @@ ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; def s[36:51] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v31, s4, 0 -; GCN-NEXT: v_writelane_b32 v31, s5, 1 -; GCN-NEXT: v_writelane_b32 v31, s6, 2 -; GCN-NEXT: v_writelane_b32 v31, s7, 3 -; GCN-NEXT: v_writelane_b32 v31, s8, 4 -; GCN-NEXT: v_writelane_b32 v31, s9, 5 -; GCN-NEXT: v_writelane_b32 v31, s10, 6 -; GCN-NEXT: v_writelane_b32 v31, s11, 7 -; GCN-NEXT: v_writelane_b32 v31, s12, 8 -; GCN-NEXT: v_writelane_b32 v31, s13, 9 -; GCN-NEXT: v_writelane_b32 v31, s14, 10 -; GCN-NEXT: v_writelane_b32 v31, s15, 11 -; GCN-NEXT: v_writelane_b32 v31, s16, 12 -; GCN-NEXT: v_writelane_b32 v31, s17, 13 -; GCN-NEXT: v_writelane_b32 v31, s18, 14 -; GCN-NEXT: v_writelane_b32 v31, s19, 15 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:19] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_writelane_b32 v31, s4, 16 -; GCN-NEXT: v_writelane_b32 v31, s5, 17 -; GCN-NEXT: v_writelane_b32 v31, s6, 18 -; GCN-NEXT: v_writelane_b32 v31, s7, 19 -; GCN-NEXT: v_writelane_b32 v31, s8, 20 -; GCN-NEXT: v_writelane_b32 v31, s9, 21 -; GCN-NEXT: v_writelane_b32 v31, s10, 22 -; GCN-NEXT: v_writelane_b32 v31, s11, 23 -; GCN-NEXT: v_writelane_b32 v31, s12, 24 -; GCN-NEXT: v_writelane_b32 v31, s13, 25 -; GCN-NEXT: v_writelane_b32 v31, s14, 26 -; GCN-NEXT: v_writelane_b32 v31, s15, 27 -; GCN-NEXT: v_writelane_b32 v31, s16, 28 -; GCN-NEXT: v_writelane_b32 v31, s17, 29 -; GCN-NEXT: v_writelane_b32 v31, s18, 30 -; GCN-NEXT: v_writelane_b32 v31, s19, 31 -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[4:19] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; def s[0:1] -; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b32 s3, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_cmp_lg_u32 s2, s3 -; GCN-NEXT: v_writelane_b32 v31, s36, 32 -; GCN-NEXT: v_writelane_b32 v31, s37, 33 -; GCN-NEXT: v_writelane_b32 v31, s38, 34 -; GCN-NEXT: v_writelane_b32 v31, s39, 35 -; GCN-NEXT: v_writelane_b32 v31, s40, 36 -; GCN-NEXT: v_writelane_b32 v31, s41, 37 -; GCN-NEXT: v_writelane_b32 v31, s42, 38 -; GCN-NEXT: v_writelane_b32 v31, s43, 39 -; GCN-NEXT: v_writelane_b32 v31, s44, 40 -; GCN-NEXT: v_writelane_b32 v31, s45, 41 -; GCN-NEXT: v_writelane_b32 v31, s46, 42 -; GCN-NEXT: v_writelane_b32 v31, s47, 43 -; GCN-NEXT: v_writelane_b32 v31, s48, 44 -; GCN-NEXT: v_writelane_b32 v31, s49, 45 -; GCN-NEXT: v_writelane_b32 v31, s50, 46 -; GCN-NEXT: v_writelane_b32 v31, s51, 47 -; GCN-NEXT: v_writelane_b32 v31, s4, 48 -; GCN-NEXT: v_writelane_b32 v31, s5, 49 -; GCN-NEXT: v_writelane_b32 v31, s6, 50 -; GCN-NEXT: v_writelane_b32 v31, s7, 51 -; GCN-NEXT: v_writelane_b32 v31, s8, 52 -; GCN-NEXT: v_writelane_b32 v31, s9, 53 -; GCN-NEXT: v_writelane_b32 v31, s10, 54 -; GCN-NEXT: v_writelane_b32 v31, s11, 55 -; GCN-NEXT: v_writelane_b32 v31, s12, 56 -; GCN-NEXT: v_writelane_b32 v31, s13, 57 -; GCN-NEXT: v_writelane_b32 v31, s14, 58 -; GCN-NEXT: v_writelane_b32 v31, s15, 59 -; GCN-NEXT: v_writelane_b32 v31, s16, 60 -; GCN-NEXT: v_writelane_b32 v31, s17, 61 -; GCN-NEXT: v_writelane_b32 v31, s18, 62 -; GCN-NEXT: v_writelane_b32 v31, s19, 63 -; GCN-NEXT: buffer_store_dword v0, off, s[20:23], 0 -; GCN-NEXT: v_writelane_b32 v0, s0, 0 -; GCN-NEXT: v_writelane_b32 v0, s1, 1 -; GCN-NEXT: s_mov_b64 s[0:1], exec -; GCN-NEXT: s_mov_b64 exec, 3 -; GCN-NEXT: buffer_store_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Spill -; GCN-NEXT: s_mov_b64 exec, s[0:1] -; GCN-NEXT: buffer_load_dword v0, off, s[20:23], 0 +; GCN-NEXT: v_writelane_b32 v31, s0, 0 +; GCN-NEXT: v_writelane_b32 v31, s4, 1 +; GCN-NEXT: v_writelane_b32 v31, s5, 2 +; GCN-NEXT: v_writelane_b32 v31, s6, 3 +; GCN-NEXT: v_writelane_b32 v31, s7, 4 +; GCN-NEXT: v_writelane_b32 v31, s8, 5 +; GCN-NEXT: v_writelane_b32 v31, s9, 6 +; GCN-NEXT: v_writelane_b32 v31, s10, 7 +; GCN-NEXT: v_writelane_b32 v31, s11, 8 +; GCN-NEXT: v_writelane_b32 v31, s12, 9 +; GCN-NEXT: v_writelane_b32 v31, s13, 10 +; GCN-NEXT: v_writelane_b32 v31, s14, 11 +; GCN-NEXT: v_writelane_b32 v31, s15, 12 +; GCN-NEXT: v_writelane_b32 v31, s16, 13 +; GCN-NEXT: v_writelane_b32 v31, s17, 14 +; GCN-NEXT: v_writelane_b32 v31, s18, 15 +; GCN-NEXT: v_writelane_b32 v31, s19, 16 +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[0:15] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[16:31] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: ;;#ASMSTART +; GCN-NEXT: ; def s[34:35] +; GCN-NEXT: ;;#ASMEND +; GCN-NEXT: s_mov_b32 s33, 0 +; GCN-NEXT: v_readlane_b32 s52, v31, 0 +; GCN-NEXT: s_cmp_lg_u32 s52, s33 +; GCN-NEXT: v_writelane_b32 v31, s36, 17 +; GCN-NEXT: v_writelane_b32 v31, s37, 18 +; GCN-NEXT: v_writelane_b32 v31, s38, 19 +; GCN-NEXT: v_writelane_b32 v31, s39, 20 +; GCN-NEXT: v_writelane_b32 v31, s40, 21 +; GCN-NEXT: v_writelane_b32 v31, s41, 22 +; GCN-NEXT: v_writelane_b32 v31, s42, 23 +; GCN-NEXT: v_writelane_b32 v31, s43, 24 +; GCN-NEXT: v_writelane_b32 v31, s44, 25 +; GCN-NEXT: v_writelane_b32 v31, s45, 26 +; GCN-NEXT: v_writelane_b32 v31, s46, 27 +; GCN-NEXT: v_writelane_b32 v31, s47, 28 +; GCN-NEXT: v_writelane_b32 v31, s48, 29 +; GCN-NEXT: v_writelane_b32 v31, s49, 30 +; GCN-NEXT: v_writelane_b32 v31, s50, 31 +; GCN-NEXT: v_writelane_b32 v31, s51, 32 +; GCN-NEXT: v_writelane_b32 v31, s0, 33 +; GCN-NEXT: v_writelane_b32 v31, s1, 34 +; GCN-NEXT: v_writelane_b32 v31, s2, 35 +; GCN-NEXT: v_writelane_b32 v31, s3, 36 +; GCN-NEXT: v_writelane_b32 v31, s4, 37 +; GCN-NEXT: v_writelane_b32 v31, s5, 38 +; GCN-NEXT: v_writelane_b32 v31, s6, 39 +; GCN-NEXT: v_writelane_b32 v31, s7, 40 +; GCN-NEXT: v_writelane_b32 v31, s8, 41 +; GCN-NEXT: v_writelane_b32 v31, s9, 42 +; GCN-NEXT: v_writelane_b32 v31, s10, 43 +; GCN-NEXT: v_writelane_b32 v31, s11, 44 +; GCN-NEXT: v_writelane_b32 v31, s12, 45 +; GCN-NEXT: v_writelane_b32 v31, s13, 46 +; GCN-NEXT: v_writelane_b32 v31, s14, 47 +; GCN-NEXT: v_writelane_b32 v31, s15, 48 +; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 +; GCN-NEXT: v_writelane_b32 v0, s16, 0 +; GCN-NEXT: v_writelane_b32 v0, s17, 1 +; GCN-NEXT: v_writelane_b32 v0, s18, 2 +; GCN-NEXT: v_writelane_b32 v0, s19, 3 +; GCN-NEXT: v_writelane_b32 v0, s20, 4 +; GCN-NEXT: v_writelane_b32 v0, s21, 5 +; GCN-NEXT: v_writelane_b32 v0, s22, 6 +; GCN-NEXT: v_writelane_b32 v0, s23, 7 +; GCN-NEXT: v_writelane_b32 v0, s24, 8 +; GCN-NEXT: v_writelane_b32 v0, s25, 9 +; GCN-NEXT: v_writelane_b32 v0, s26, 10 +; GCN-NEXT: v_writelane_b32 v0, s27, 11 +; GCN-NEXT: v_writelane_b32 v0, s28, 12 +; GCN-NEXT: v_writelane_b32 v0, s29, 13 +; GCN-NEXT: v_writelane_b32 v0, s30, 14 +; GCN-NEXT: v_writelane_b32 v0, s31, 15 +; GCN-NEXT: s_mov_b64 s[16:17], exec +; GCN-NEXT: s_mov_b64 exec, 0xffff +; GCN-NEXT: buffer_store_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Spill +; GCN-NEXT: s_mov_b64 exec, s[16:17] +; GCN-NEXT: v_writelane_b32 v31, s34, 49 +; GCN-NEXT: v_writelane_b32 v31, s35, 50 +; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 ; GCN-NEXT: s_cbranch_scc1 BB2_2 ; GCN-NEXT: ; %bb.1: ; %bb0 -; GCN-NEXT: v_readlane_b32 s0, v31, 0 -; GCN-NEXT: v_readlane_b32 s1, v31, 1 -; GCN-NEXT: v_readlane_b32 s2, v31, 2 -; GCN-NEXT: v_readlane_b32 s3, v31, 3 -; GCN-NEXT: v_readlane_b32 s4, v31, 4 -; GCN-NEXT: v_readlane_b32 s5, v31, 5 -; GCN-NEXT: v_readlane_b32 s6, v31, 6 -; GCN-NEXT: v_readlane_b32 s7, v31, 7 -; GCN-NEXT: v_readlane_b32 s8, v31, 8 -; GCN-NEXT: v_readlane_b32 s9, v31, 9 -; GCN-NEXT: v_readlane_b32 s10, v31, 10 -; GCN-NEXT: v_readlane_b32 s11, v31, 11 -; GCN-NEXT: v_readlane_b32 s12, v31, 12 -; GCN-NEXT: v_readlane_b32 s13, v31, 13 -; GCN-NEXT: v_readlane_b32 s14, v31, 14 -; GCN-NEXT: v_readlane_b32 s15, v31, 15 +; GCN-NEXT: v_readlane_b32 s0, v31, 1 +; GCN-NEXT: v_readlane_b32 s1, v31, 2 +; GCN-NEXT: v_readlane_b32 s2, v31, 3 +; GCN-NEXT: v_readlane_b32 s3, v31, 4 +; GCN-NEXT: v_readlane_b32 s4, v31, 5 +; GCN-NEXT: v_readlane_b32 s5, v31, 6 +; GCN-NEXT: v_readlane_b32 s6, v31, 7 +; GCN-NEXT: v_readlane_b32 s7, v31, 8 +; GCN-NEXT: v_readlane_b32 s8, v31, 9 +; GCN-NEXT: v_readlane_b32 s9, v31, 10 +; GCN-NEXT: v_readlane_b32 s10, v31, 11 +; GCN-NEXT: v_readlane_b32 s11, v31, 12 +; GCN-NEXT: v_readlane_b32 s12, v31, 13 +; GCN-NEXT: v_readlane_b32 s13, v31, 14 +; GCN-NEXT: v_readlane_b32 s14, v31, 15 +; GCN-NEXT: v_readlane_b32 s15, v31, 16 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 32 -; GCN-NEXT: v_readlane_b32 s1, v31, 33 -; GCN-NEXT: v_readlane_b32 s2, v31, 34 -; GCN-NEXT: v_readlane_b32 s3, v31, 35 -; GCN-NEXT: v_readlane_b32 s4, v31, 36 -; GCN-NEXT: v_readlane_b32 s5, v31, 37 -; GCN-NEXT: v_readlane_b32 s6, v31, 38 -; GCN-NEXT: v_readlane_b32 s7, v31, 39 -; GCN-NEXT: v_readlane_b32 s8, v31, 40 -; GCN-NEXT: v_readlane_b32 s9, v31, 41 -; GCN-NEXT: v_readlane_b32 s10, v31, 42 -; GCN-NEXT: v_readlane_b32 s11, v31, 43 -; GCN-NEXT: v_readlane_b32 s12, v31, 44 -; GCN-NEXT: v_readlane_b32 s13, v31, 45 -; GCN-NEXT: v_readlane_b32 s14, v31, 46 -; GCN-NEXT: v_readlane_b32 s15, v31, 47 +; GCN-NEXT: v_readlane_b32 s0, v31, 17 +; GCN-NEXT: v_readlane_b32 s1, v31, 18 +; GCN-NEXT: v_readlane_b32 s2, v31, 19 +; GCN-NEXT: v_readlane_b32 s3, v31, 20 +; GCN-NEXT: v_readlane_b32 s4, v31, 21 +; GCN-NEXT: v_readlane_b32 s5, v31, 22 +; GCN-NEXT: v_readlane_b32 s6, v31, 23 +; GCN-NEXT: v_readlane_b32 s7, v31, 24 +; GCN-NEXT: v_readlane_b32 s8, v31, 25 +; GCN-NEXT: v_readlane_b32 s9, v31, 26 +; GCN-NEXT: v_readlane_b32 s10, v31, 27 +; GCN-NEXT: v_readlane_b32 s11, v31, 28 +; GCN-NEXT: v_readlane_b32 s12, v31, 29 +; GCN-NEXT: v_readlane_b32 s13, v31, 30 +; GCN-NEXT: v_readlane_b32 s14, v31, 31 +; GCN-NEXT: v_readlane_b32 s15, v31, 32 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 16 -; GCN-NEXT: v_readlane_b32 s1, v31, 17 -; GCN-NEXT: v_readlane_b32 s2, v31, 18 -; GCN-NEXT: v_readlane_b32 s3, v31, 19 -; GCN-NEXT: v_readlane_b32 s4, v31, 20 -; GCN-NEXT: v_readlane_b32 s5, v31, 21 -; GCN-NEXT: v_readlane_b32 s6, v31, 22 -; GCN-NEXT: v_readlane_b32 s7, v31, 23 -; GCN-NEXT: v_readlane_b32 s8, v31, 24 -; GCN-NEXT: v_readlane_b32 s9, v31, 25 -; GCN-NEXT: v_readlane_b32 s10, v31, 26 -; GCN-NEXT: v_readlane_b32 s11, v31, 27 -; GCN-NEXT: v_readlane_b32 s12, v31, 28 -; GCN-NEXT: v_readlane_b32 s13, v31, 29 -; GCN-NEXT: v_readlane_b32 s14, v31, 30 -; GCN-NEXT: v_readlane_b32 s15, v31, 31 +; GCN-NEXT: v_readlane_b32 s0, v31, 33 +; GCN-NEXT: v_readlane_b32 s1, v31, 34 +; GCN-NEXT: v_readlane_b32 s2, v31, 35 +; GCN-NEXT: v_readlane_b32 s3, v31, 36 +; GCN-NEXT: v_readlane_b32 s4, v31, 37 +; GCN-NEXT: v_readlane_b32 s5, v31, 38 +; GCN-NEXT: v_readlane_b32 s6, v31, 39 +; GCN-NEXT: v_readlane_b32 s7, v31, 40 +; GCN-NEXT: v_readlane_b32 s8, v31, 41 +; GCN-NEXT: v_readlane_b32 s9, v31, 42 +; GCN-NEXT: v_readlane_b32 s10, v31, 43 +; GCN-NEXT: v_readlane_b32 s11, v31, 44 +; GCN-NEXT: v_readlane_b32 s12, v31, 45 +; GCN-NEXT: v_readlane_b32 s13, v31, 46 +; GCN-NEXT: v_readlane_b32 s14, v31, 47 +; GCN-NEXT: v_readlane_b32 s15, v31, 48 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: v_readlane_b32 s0, v31, 48 -; GCN-NEXT: v_readlane_b32 s1, v31, 49 -; GCN-NEXT: v_readlane_b32 s2, v31, 50 -; GCN-NEXT: v_readlane_b32 s3, v31, 51 -; GCN-NEXT: v_readlane_b32 s4, v31, 52 -; GCN-NEXT: v_readlane_b32 s5, v31, 53 -; GCN-NEXT: v_readlane_b32 s6, v31, 54 -; GCN-NEXT: v_readlane_b32 s7, v31, 55 -; GCN-NEXT: v_readlane_b32 s8, v31, 56 -; GCN-NEXT: v_readlane_b32 s9, v31, 57 -; GCN-NEXT: v_readlane_b32 s10, v31, 58 -; GCN-NEXT: v_readlane_b32 s11, v31, 59 -; GCN-NEXT: v_readlane_b32 s12, v31, 60 -; GCN-NEXT: v_readlane_b32 s13, v31, 61 -; GCN-NEXT: v_readlane_b32 s14, v31, 62 -; GCN-NEXT: v_readlane_b32 s15, v31, 63 +; GCN-NEXT: s_mov_b64 s[0:1], exec +; GCN-NEXT: s_mov_b64 exec, 0xffff +; GCN-NEXT: buffer_load_dword v0, off, s[56:59], 0 offset:4 ; 4-byte Folded Reload +; GCN-NEXT: s_mov_b64 exec, s[0:1] +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_readlane_b32 s0, v0, 0 +; GCN-NEXT: v_readlane_b32 s1, v0, 1 +; GCN-NEXT: v_readlane_b32 s2, v0, 2 +; GCN-NEXT: v_readlane_b32 s3, v0, 3 +; GCN-NEXT: v_readlane_b32 s4, v0, 4 +; GCN-NEXT: v_readlane_b32 s5, v0, 5 +; GCN-NEXT: v_readlane_b32 s6, v0, 6 +; GCN-NEXT: v_readlane_b32 s7, v0, 7 +; GCN-NEXT: v_readlane_b32 s8, v0, 8 +; GCN-NEXT: v_readlane_b32 s9, v0, 9 +; GCN-NEXT: v_readlane_b32 s10, v0, 10 +; GCN-NEXT: v_readlane_b32 s11, v0, 11 +; GCN-NEXT: v_readlane_b32 s12, v0, 12 +; GCN-NEXT: v_readlane_b32 s13, v0, 13 +; GCN-NEXT: v_readlane_b32 s14, v0, 14 +; GCN-NEXT: v_readlane_b32 s15, v0, 15 ; GCN-NEXT: ;;#ASMSTART ; GCN-NEXT: ; use s[0:15] ; GCN-NEXT: ;;#ASMEND -; GCN-NEXT: s_mov_b64 s[16:17], exec -; GCN-NEXT: s_mov_b64 exec, 3 -; GCN-NEXT: buffer_load_dword v0, off, s[20:23], 0 offset:4 ; 4-byte Folded Reload -; GCN-NEXT: s_mov_b64 exec, s[16:17] -; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_readlane_b32 s16, v0, 0 -; GCN-NEXT: v_readlane_b32 s17, v0, 1 +; GCN-NEXT: v_readlane_b32 s0, v31, 49 +; GCN-NEXT: v_readlane_b32 s1, v31, 50 ; GCN-NEXT: ;;#ASMSTART -; GCN-NEXT: ; use s[16:17] +; GCN-NEXT: ; use s[0:1] ; GCN-NEXT: ;;#ASMEND ; GCN-NEXT: BB2_2: ; %ret ; GCN-NEXT: s_endpgm Index: llvm/test/CodeGen/AMDGPU/spill-m0.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/spill-m0.ll +++ llvm/test/CodeGen/AMDGPU/spill-m0.ll @@ -77,6 +77,101 @@ ret void } +; Force save and restore of m0 during SMEM spill +; GCN-LABEL: {{^}}m0_unavailable_spill: + +; GCN: ; def m0, 1 + +; GCN: s_mov_b32 m0, s0 +; GCN: v_interp_mov_f32 + +; GCN: ; clobber m0 + +; TOSMEM: s_mov_b32 s2, m0 +; TOSMEM: s_add_u32 m0, s3, 0x100 +; TOSMEM-NEXT: s_buffer_store_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Spill +; TOSMEM: s_mov_b32 m0, s2 + +; TOSMEM: s_mov_b64 exec, +; TOSMEM: s_cbranch_execz +; TOSMEM: s_branch + +; TOSMEM: BB{{[0-9]+_[0-9]+}}: +; TOSMEM: s_add_u32 m0, s3, 0x100 +; TOSMEM-NEXT: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, m0 ; 8-byte Folded Reload + +; GCN-NOT: v_readlane_b32 m0 +; GCN-NOT: s_buffer_store_dword m0 +; GCN-NOT: s_buffer_load_dword m0 +define amdgpu_kernel void @m0_unavailable_spill(i32 %m0.arg) #0 { +main_body: + %m0 = call i32 asm sideeffect "; def $0, 1", "={m0}"() #0 + %tmp = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %m0.arg) + call void asm sideeffect "; clobber $0", "~{m0}"() #0 + %cmp = fcmp ueq float 0.000000e+00, %tmp + br i1 %cmp, label %if, label %else + +if: ; preds = %main_body + store volatile i32 8, i32 addrspace(1)* undef + br label %endif + +else: ; preds = %main_body + store volatile i32 11, i32 addrspace(1)* undef + br label %endif + +endif: + ret void +} + +; GCN-LABEL: {{^}}restore_m0_lds: +; TOSMEM: s_load_dwordx2 [[REG:s\[[0-9]+:[0-9]+\]]] +; TOSMEM: s_cmp_eq_u32 +; FIXME: RegScavenger::isRegUsed() always returns true if m0 is reserved, so we have to save and restore it +; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_add_u32 m0, s3, 0x100 +; TOSMEM: s_buffer_store_dword s{{[0-9]+}}, s[88:91], m0 ; 4-byte Folded Spill +; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_add_u32 m0, s3, 0x200 +; TOSMEM: s_buffer_store_dwordx2 [[REG]], s[88:91], m0 ; 8-byte Folded Spill +; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_cbranch_scc1 + +; TOSMEM: s_mov_b32 m0, -1 + +; TOSMEM: s_mov_b32 s2, m0 +; TOSMEM: s_add_u32 m0, s3, 0x200 +; TOSMEM: s_buffer_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[88:91], m0 ; 8-byte Folded Reload +; TOSMEM: s_mov_b32 m0, s2 +; TOSMEM: s_waitcnt lgkmcnt(0) + +; TOSMEM: ds_write_b64 + +; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_add_u32 m0, s3, 0x100 +; TOSMEM: s_buffer_load_dword s2, s[88:91], m0 ; 4-byte Folded Reload +; FIXME-TOSMEM-NOT: m0 +; TOSMEM: s_waitcnt lgkmcnt(0) +; TOSMEM-NOT: m0 +; TOSMEM: s_mov_b32 m0, s2 +; TOSMEM: ; use m0 + +; TOSMEM: s_dcache_wb +; TOSMEM: s_endpgm +define amdgpu_kernel void @restore_m0_lds(i32 %arg) { + %m0 = call i32 asm sideeffect "s_mov_b32 m0, 0", "={m0}"() #0 + %sval = load volatile i64, i64 addrspace(4)* undef + %cmp = icmp eq i32 %arg, 0 + br i1 %cmp, label %ret, label %bb + +bb: + store volatile i64 %sval, i64 addrspace(3)* undef + call void asm sideeffect "; use $0", "{m0}"(i32 %m0) #0 + br label %ret + +ret: + ret void +} + declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #1 declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #0 declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) #0 Index: llvm/test/CodeGen/AMDGPU/wwm-reserved.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/wwm-reserved.ll +++ llvm/test/CodeGen/AMDGPU/wwm-reserved.ll @@ -90,10 +90,10 @@ } define amdgpu_kernel void @call(<4 x i32> inreg %tmp14, i32 inreg %arg) { -; GFX9-O0: v_mov_b32_e32 v0, s2 +; GFX9-O0: v_mov_b32_e32 v0, s0 ; GFX9-O3: v_mov_b32_e32 v2, s0 ; GFX9-NEXT: s_not_b64 exec, exec -; GFX9-O0-NEXT: v_mov_b32_e32 v0, s3 +; GFX9-O0-NEXT: v_mov_b32_e32 v0, s1 ; GFX9-O3-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_not_b64 exec, exec %tmp107 = tail call i32 @llvm.amdgcn.set.inactive.i32(i32 %arg, i32 0) @@ -136,8 +136,8 @@ ; GFX9-O0: buffer_store_dword v1 ; GFX9: s_swappc_b64 %tmp134 = call i64 @called_i64(i64 %tmp107) -; GFX9-O0: buffer_load_dword v6 -; GFX9-O0: buffer_load_dword v7 +; GFX9-O0: buffer_load_dword v4 +; GFX9-O0: buffer_load_dword v5 %tmp136 = add i64 %tmp134, %tmp107 %tmp137 = tail call i64 @llvm.amdgcn.wwm.i64(i64 %tmp136) %tmp138 = bitcast i64 %tmp137 to <2 x i32> Index: llvm/test/CodeGen/ARM/legalize-bitcast.ll =================================================================== --- llvm/test/CodeGen/ARM/legalize-bitcast.ll +++ llvm/test/CodeGen/ARM/legalize-bitcast.ll @@ -49,9 +49,9 @@ ; CHECK-NEXT: vmov.32 d16[0], r0 ; CHECK-NEXT: @ implicit-def: $q9 ; CHECK-NEXT: vmov.f64 d18, d16 -; CHECK-NEXT: vrev32.16 q9, q9 -; CHECK-NEXT: @ kill: def $d18 killed $d18 killed $q9 -; CHECK-NEXT: vmov.u16 r0, d18[0] +; CHECK-NEXT: vrev32.16 q8, q9 +; CHECK-NEXT: @ kill: def $d16 killed $d16 killed $q8 +; CHECK-NEXT: vmov.u16 r0, d16[0] ; CHECK-NEXT: bx lr %vec = bitcast i80 %in to <5 x i16> %e0 = extractelement <5 x i16> %vec, i32 0 Index: llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll =================================================================== --- llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll +++ llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/fptosi_and_fptoui.ll @@ -235,15 +235,15 @@ ; FP32-NEXT: mfc1 $1, $f0 ; FP32-NEXT: lui $2, 16864 ; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f2 -; FP32-NEXT: mtc1 $2, $f3 -; FP32-NEXT: sub.d $f4, $f12, $f2 -; FP32-NEXT: trunc.w.d $f0, $f4 -; FP32-NEXT: mfc1 $2, $f0 +; FP32-NEXT: mtc1 $3, $f0 +; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: sub.d $f2, $f12, $f0 +; FP32-NEXT: trunc.w.d $f2, $f2 +; FP32-NEXT: mfc1 $2, $f2 ; FP32-NEXT: lui $3, 32768 ; FP32-NEXT: xor $2, $2, $3 ; FP32-NEXT: addiu $3, $zero, 1 -; FP32-NEXT: c.ult.d $f12, $f2 +; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 ; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $2, $1, $3 @@ -256,15 +256,15 @@ ; FP64-NEXT: mfc1 $1, $f0 ; FP64-NEXT: lui $2, 16864 ; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f1 -; FP64-NEXT: mthc1 $2, $f1 -; FP64-NEXT: sub.d $f2, $f12, $f1 -; FP64-NEXT: trunc.w.d $f0, $f2 -; FP64-NEXT: mfc1 $2, $f0 +; FP64-NEXT: mtc1 $3, $f0 +; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: sub.d $f1, $f12, $f0 +; FP64-NEXT: trunc.w.d $f1, $f1 +; FP64-NEXT: mfc1 $2, $f1 ; FP64-NEXT: lui $3, 32768 ; FP64-NEXT: xor $2, $2, $3 ; FP64-NEXT: addiu $3, $zero, 1 -; FP64-NEXT: c.ult.d $f12, $f1 +; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 ; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $2, $1, $3 @@ -282,15 +282,15 @@ ; FP32-NEXT: mfc1 $1, $f0 ; FP32-NEXT: lui $2, 16864 ; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f2 -; FP32-NEXT: mtc1 $2, $f3 -; FP32-NEXT: sub.d $f4, $f12, $f2 -; FP32-NEXT: trunc.w.d $f0, $f4 -; FP32-NEXT: mfc1 $2, $f0 +; FP32-NEXT: mtc1 $3, $f0 +; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: sub.d $f2, $f12, $f0 +; FP32-NEXT: trunc.w.d $f2, $f2 +; FP32-NEXT: mfc1 $2, $f2 ; FP32-NEXT: lui $3, 32768 ; FP32-NEXT: xor $2, $2, $3 ; FP32-NEXT: addiu $3, $zero, 1 -; FP32-NEXT: c.ult.d $f12, $f2 +; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 ; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $2, $1, $3 @@ -304,15 +304,15 @@ ; FP64-NEXT: mfc1 $1, $f0 ; FP64-NEXT: lui $2, 16864 ; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f1 -; FP64-NEXT: mthc1 $2, $f1 -; FP64-NEXT: sub.d $f2, $f12, $f1 -; FP64-NEXT: trunc.w.d $f0, $f2 -; FP64-NEXT: mfc1 $2, $f0 +; FP64-NEXT: mtc1 $3, $f0 +; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: sub.d $f1, $f12, $f0 +; FP64-NEXT: trunc.w.d $f1, $f1 +; FP64-NEXT: mfc1 $2, $f1 ; FP64-NEXT: lui $3, 32768 ; FP64-NEXT: xor $2, $2, $3 ; FP64-NEXT: addiu $3, $zero, 1 -; FP64-NEXT: c.ult.d $f12, $f1 +; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 ; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $2, $1, $3 @@ -331,15 +331,15 @@ ; FP32-NEXT: mfc1 $1, $f0 ; FP32-NEXT: lui $2, 16864 ; FP32-NEXT: ori $3, $zero, 0 -; FP32-NEXT: mtc1 $3, $f2 -; FP32-NEXT: mtc1 $2, $f3 -; FP32-NEXT: sub.d $f4, $f12, $f2 -; FP32-NEXT: trunc.w.d $f0, $f4 -; FP32-NEXT: mfc1 $2, $f0 +; FP32-NEXT: mtc1 $3, $f0 +; FP32-NEXT: mtc1 $2, $f1 +; FP32-NEXT: sub.d $f2, $f12, $f0 +; FP32-NEXT: trunc.w.d $f2, $f2 +; FP32-NEXT: mfc1 $2, $f2 ; FP32-NEXT: lui $3, 32768 ; FP32-NEXT: xor $2, $2, $3 ; FP32-NEXT: addiu $3, $zero, 1 -; FP32-NEXT: c.ult.d $f12, $f2 +; FP32-NEXT: c.ult.d $f12, $f0 ; FP32-NEXT: movf $3, $zero, $fcc0 ; FP32-NEXT: andi $3, $3, 1 ; FP32-NEXT: movn $2, $1, $3 @@ -353,15 +353,15 @@ ; FP64-NEXT: mfc1 $1, $f0 ; FP64-NEXT: lui $2, 16864 ; FP64-NEXT: ori $3, $zero, 0 -; FP64-NEXT: mtc1 $3, $f1 -; FP64-NEXT: mthc1 $2, $f1 -; FP64-NEXT: sub.d $f2, $f12, $f1 -; FP64-NEXT: trunc.w.d $f0, $f2 -; FP64-NEXT: mfc1 $2, $f0 +; FP64-NEXT: mtc1 $3, $f0 +; FP64-NEXT: mthc1 $2, $f0 +; FP64-NEXT: sub.d $f1, $f12, $f0 +; FP64-NEXT: trunc.w.d $f1, $f1 +; FP64-NEXT: mfc1 $2, $f1 ; FP64-NEXT: lui $3, 32768 ; FP64-NEXT: xor $2, $2, $3 ; FP64-NEXT: addiu $3, $zero, 1 -; FP64-NEXT: c.ult.d $f12, $f1 +; FP64-NEXT: c.ult.d $f12, $f0 ; FP64-NEXT: movf $3, $zero, $fcc0 ; FP64-NEXT: andi $3, $3, 1 ; FP64-NEXT: movn $2, $1, $3 Index: llvm/test/CodeGen/Mips/atomic-min-max.ll =================================================================== --- llvm/test/CodeGen/Mips/atomic-min-max.ll +++ llvm/test/CodeGen/Mips/atomic-min-max.ll @@ -1154,26 +1154,26 @@ ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 65535 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB4_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: slt $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movn $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB4_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: slt $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movn $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB4_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1194,26 +1194,26 @@ ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 65535 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB4_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: slt $11, $8, $5 -; MIPS64R6-NEXT: seleqz $9, $8, $11 -; MIPS64R6-NEXT: selnez $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB4_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: slt $10, $7, $5 +; MIPS64R6-NEXT: seleqz $8, $7, $10 +; MIPS64R6-NEXT: selnez $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB4_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1232,28 +1232,28 @@ ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 65535 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB4_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movn $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB4_1 +; MIPS64EL-NEXT: slt $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movn $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB4_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1273,28 +1273,28 @@ ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB4_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $11, $8, $5 -; MIPS64ELR6-NEXT: seleqz $9, $8, $11 -; MIPS64ELR6-NEXT: selnez $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB4_1 +; MIPS64ELR6-NEXT: slt $10, $7, $5 +; MIPS64ELR6-NEXT: seleqz $8, $7, $10 +; MIPS64ELR6-NEXT: selnez $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB4_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1635,26 +1635,26 @@ ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 65535 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB5_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: slt $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movz $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB5_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: slt $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movz $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB5_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1675,26 +1675,26 @@ ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 65535 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB5_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: slt $11, $8, $5 -; MIPS64R6-NEXT: selnez $9, $8, $11 -; MIPS64R6-NEXT: seleqz $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB5_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: slt $10, $7, $5 +; MIPS64R6-NEXT: selnez $8, $7, $10 +; MIPS64R6-NEXT: seleqz $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB5_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1713,28 +1713,28 @@ ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 65535 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB5_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movz $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB5_1 +; MIPS64EL-NEXT: slt $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movz $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB5_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -1754,28 +1754,28 @@ ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB5_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $11, $8, $5 -; MIPS64ELR6-NEXT: selnez $9, $8, $11 -; MIPS64ELR6-NEXT: seleqz $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB5_1 +; MIPS64ELR6-NEXT: slt $10, $7, $5 +; MIPS64ELR6-NEXT: selnez $8, $7, $10 +; MIPS64ELR6-NEXT: seleqz $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB5_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2116,26 +2116,26 @@ ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 65535 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB6_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: sltu $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movn $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB6_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: sltu $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movn $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB6_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2156,26 +2156,26 @@ ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 65535 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB6_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: sltu $11, $8, $5 -; MIPS64R6-NEXT: seleqz $9, $8, $11 -; MIPS64R6-NEXT: selnez $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB6_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: sltu $10, $7, $5 +; MIPS64R6-NEXT: seleqz $8, $7, $10 +; MIPS64R6-NEXT: selnez $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB6_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2194,28 +2194,28 @@ ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 65535 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB6_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movn $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB6_1 +; MIPS64EL-NEXT: sltu $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movn $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB6_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2235,28 +2235,28 @@ ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB6_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $11, $8, $5 -; MIPS64ELR6-NEXT: seleqz $9, $8, $11 -; MIPS64ELR6-NEXT: selnez $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB6_1 +; MIPS64ELR6-NEXT: sltu $10, $7, $5 +; MIPS64ELR6-NEXT: seleqz $8, $7, $10 +; MIPS64ELR6-NEXT: selnez $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB6_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2597,26 +2597,26 @@ ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 65535 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB7_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: sltu $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movz $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB7_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: sltu $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movz $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB7_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2637,26 +2637,26 @@ ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 65535 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB7_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: sltu $11, $8, $5 -; MIPS64R6-NEXT: selnez $9, $8, $11 -; MIPS64R6-NEXT: seleqz $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB7_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: sltu $10, $7, $5 +; MIPS64R6-NEXT: selnez $8, $7, $10 +; MIPS64R6-NEXT: seleqz $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB7_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2675,28 +2675,28 @@ ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 65535 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB7_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movz $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB7_1 +; MIPS64EL-NEXT: sltu $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movz $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB7_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -2716,28 +2716,28 @@ ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 65535 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB7_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $11, $8, $5 -; MIPS64ELR6-NEXT: selnez $9, $8, $11 -; MIPS64ELR6-NEXT: seleqz $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB7_1 +; MIPS64ELR6-NEXT: sltu $10, $7, $5 +; MIPS64ELR6-NEXT: selnez $8, $7, $10 +; MIPS64ELR6-NEXT: seleqz $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB7_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3079,26 +3079,26 @@ ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 255 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB8_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: slt $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movn $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB8_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: slt $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movn $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB8_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3119,26 +3119,26 @@ ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 255 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB8_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: slt $11, $8, $5 -; MIPS64R6-NEXT: seleqz $9, $8, $11 -; MIPS64R6-NEXT: selnez $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB8_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: slt $10, $7, $5 +; MIPS64R6-NEXT: seleqz $8, $7, $10 +; MIPS64R6-NEXT: selnez $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB8_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3157,28 +3157,28 @@ ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 255 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB8_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movn $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB8_1 +; MIPS64EL-NEXT: slt $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movn $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB8_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3198,28 +3198,28 @@ ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 255 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB8_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $11, $8, $5 -; MIPS64ELR6-NEXT: seleqz $9, $8, $11 -; MIPS64ELR6-NEXT: selnez $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB8_1 +; MIPS64ELR6-NEXT: slt $10, $7, $5 +; MIPS64ELR6-NEXT: seleqz $8, $7, $10 +; MIPS64ELR6-NEXT: selnez $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB8_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3560,26 +3560,26 @@ ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 255 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB9_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: slt $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movz $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB9_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: slt $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movz $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB9_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3600,26 +3600,26 @@ ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 255 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB9_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: slt $11, $8, $5 -; MIPS64R6-NEXT: selnez $9, $8, $11 -; MIPS64R6-NEXT: seleqz $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB9_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: slt $10, $7, $5 +; MIPS64R6-NEXT: selnez $8, $7, $10 +; MIPS64R6-NEXT: seleqz $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB9_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3638,28 +3638,28 @@ ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 255 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB9_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: slt $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movz $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB9_1 +; MIPS64EL-NEXT: slt $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movz $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB9_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -3679,28 +3679,28 @@ ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 255 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB9_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: slt $11, $8, $5 -; MIPS64ELR6-NEXT: selnez $9, $8, $11 -; MIPS64ELR6-NEXT: seleqz $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB9_1 +; MIPS64ELR6-NEXT: slt $10, $7, $5 +; MIPS64ELR6-NEXT: selnez $8, $7, $10 +; MIPS64ELR6-NEXT: seleqz $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB9_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4041,26 +4041,26 @@ ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 255 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB10_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: sltu $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movn $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB10_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: sltu $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movn $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB10_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4081,26 +4081,26 @@ ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 255 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB10_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: sltu $11, $8, $5 -; MIPS64R6-NEXT: seleqz $9, $8, $11 -; MIPS64R6-NEXT: selnez $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB10_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: sltu $10, $7, $5 +; MIPS64R6-NEXT: seleqz $8, $7, $10 +; MIPS64R6-NEXT: selnez $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB10_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4119,28 +4119,28 @@ ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 255 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB10_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movn $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB10_1 +; MIPS64EL-NEXT: sltu $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movn $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB10_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4160,28 +4160,28 @@ ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 255 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB10_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $11, $8, $5 -; MIPS64ELR6-NEXT: seleqz $9, $8, $11 -; MIPS64ELR6-NEXT: selnez $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB10_1 +; MIPS64ELR6-NEXT: sltu $10, $7, $5 +; MIPS64ELR6-NEXT: seleqz $8, $7, $10 +; MIPS64ELR6-NEXT: selnez $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB10_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4522,26 +4522,26 @@ ; MIPS64-NEXT: sll $2, $2, 3 ; MIPS64-NEXT: ori $3, $zero, 255 ; MIPS64-NEXT: sllv $3, $3, $2 -; MIPS64-NEXT: nor $6, $zero, $3 +; MIPS64-NEXT: nor $4, $zero, $3 ; MIPS64-NEXT: sllv $5, $5, $2 ; MIPS64-NEXT: .LBB11_1: # %entry ; MIPS64-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64-NEXT: ll $8, 0($1) -; MIPS64-NEXT: sltu $11, $8, $5 -; MIPS64-NEXT: move $9, $8 -; MIPS64-NEXT: movz $9, $5, $11 -; MIPS64-NEXT: and $9, $9, $3 -; MIPS64-NEXT: and $10, $8, $6 -; MIPS64-NEXT: or $10, $10, $9 -; MIPS64-NEXT: sc $10, 0($1) -; MIPS64-NEXT: beqz $10, .LBB11_1 +; MIPS64-NEXT: ll $7, 0($1) +; MIPS64-NEXT: sltu $10, $7, $5 +; MIPS64-NEXT: move $8, $7 +; MIPS64-NEXT: movz $8, $5, $10 +; MIPS64-NEXT: and $8, $8, $3 +; MIPS64-NEXT: and $9, $7, $4 +; MIPS64-NEXT: or $9, $9, $8 +; MIPS64-NEXT: sc $9, 0($1) +; MIPS64-NEXT: beqz $9, .LBB11_1 ; MIPS64-NEXT: nop ; MIPS64-NEXT: # %bb.2: # %entry -; MIPS64-NEXT: and $7, $8, $3 -; MIPS64-NEXT: srlv $7, $7, $2 -; MIPS64-NEXT: seh $7, $7 +; MIPS64-NEXT: and $6, $7, $3 +; MIPS64-NEXT: srlv $6, $6, $2 +; MIPS64-NEXT: seh $6, $6 ; MIPS64-NEXT: # %bb.3: # %entry -; MIPS64-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64-NEXT: # %bb.4: # %entry ; MIPS64-NEXT: sync ; MIPS64-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4562,26 +4562,26 @@ ; MIPS64R6-NEXT: sll $2, $2, 3 ; MIPS64R6-NEXT: ori $3, $zero, 255 ; MIPS64R6-NEXT: sllv $3, $3, $2 -; MIPS64R6-NEXT: nor $6, $zero, $3 +; MIPS64R6-NEXT: nor $4, $zero, $3 ; MIPS64R6-NEXT: sllv $5, $5, $2 ; MIPS64R6-NEXT: .LBB11_1: # %entry ; MIPS64R6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6-NEXT: ll $8, 0($1) -; MIPS64R6-NEXT: sltu $11, $8, $5 -; MIPS64R6-NEXT: selnez $9, $8, $11 -; MIPS64R6-NEXT: seleqz $11, $5, $11 -; MIPS64R6-NEXT: or $9, $9, $11 -; MIPS64R6-NEXT: and $9, $9, $3 -; MIPS64R6-NEXT: and $10, $8, $6 -; MIPS64R6-NEXT: or $10, $10, $9 -; MIPS64R6-NEXT: sc $10, 0($1) -; MIPS64R6-NEXT: beqzc $10, .LBB11_1 +; MIPS64R6-NEXT: ll $7, 0($1) +; MIPS64R6-NEXT: sltu $10, $7, $5 +; MIPS64R6-NEXT: selnez $8, $7, $10 +; MIPS64R6-NEXT: seleqz $10, $5, $10 +; MIPS64R6-NEXT: or $8, $8, $10 +; MIPS64R6-NEXT: and $8, $8, $3 +; MIPS64R6-NEXT: and $9, $7, $4 +; MIPS64R6-NEXT: or $9, $9, $8 +; MIPS64R6-NEXT: sc $9, 0($1) +; MIPS64R6-NEXT: beqzc $9, .LBB11_1 ; MIPS64R6-NEXT: # %bb.2: # %entry -; MIPS64R6-NEXT: and $7, $8, $3 -; MIPS64R6-NEXT: srlv $7, $7, $2 -; MIPS64R6-NEXT: seh $7, $7 +; MIPS64R6-NEXT: and $6, $7, $3 +; MIPS64R6-NEXT: srlv $6, $6, $2 +; MIPS64R6-NEXT: seh $6, $6 ; MIPS64R6-NEXT: # %bb.3: # %entry -; MIPS64R6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6-NEXT: # %bb.4: # %entry ; MIPS64R6-NEXT: sync ; MIPS64R6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4600,28 +4600,28 @@ ; MIPS64EL-NEXT: sll $2, $2, 3 ; MIPS64EL-NEXT: ori $3, $zero, 255 ; MIPS64EL-NEXT: sllv $3, $3, $2 -; MIPS64EL-NEXT: nor $6, $zero, $3 +; MIPS64EL-NEXT: nor $4, $zero, $3 ; MIPS64EL-NEXT: sllv $5, $5, $2 ; MIPS64EL-NEXT: .LBB11_1: # %entry ; MIPS64EL-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64EL-NEXT: ll $8, 0($1) -; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: ll $7, 0($1) +; MIPS64EL-NEXT: and $7, $7, $3 ; MIPS64EL-NEXT: and $5, $5, $3 -; MIPS64EL-NEXT: sltu $11, $8, $5 -; MIPS64EL-NEXT: move $9, $8 -; MIPS64EL-NEXT: movz $9, $5, $11 -; MIPS64EL-NEXT: and $9, $9, $3 -; MIPS64EL-NEXT: and $10, $8, $6 -; MIPS64EL-NEXT: or $10, $10, $9 -; MIPS64EL-NEXT: sc $10, 0($1) -; MIPS64EL-NEXT: beqz $10, .LBB11_1 +; MIPS64EL-NEXT: sltu $10, $7, $5 +; MIPS64EL-NEXT: move $8, $7 +; MIPS64EL-NEXT: movz $8, $5, $10 +; MIPS64EL-NEXT: and $8, $8, $3 +; MIPS64EL-NEXT: and $9, $7, $4 +; MIPS64EL-NEXT: or $9, $9, $8 +; MIPS64EL-NEXT: sc $9, 0($1) +; MIPS64EL-NEXT: beqz $9, .LBB11_1 ; MIPS64EL-NEXT: nop ; MIPS64EL-NEXT: # %bb.2: # %entry -; MIPS64EL-NEXT: and $7, $8, $3 -; MIPS64EL-NEXT: srlv $7, $7, $2 -; MIPS64EL-NEXT: seh $7, $7 +; MIPS64EL-NEXT: and $6, $7, $3 +; MIPS64EL-NEXT: srlv $6, $6, $2 +; MIPS64EL-NEXT: seh $6, $6 ; MIPS64EL-NEXT: # %bb.3: # %entry -; MIPS64EL-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64EL-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64EL-NEXT: # %bb.4: # %entry ; MIPS64EL-NEXT: sync ; MIPS64EL-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -4641,28 +4641,28 @@ ; MIPS64ELR6-NEXT: sll $2, $2, 3 ; MIPS64ELR6-NEXT: ori $3, $zero, 255 ; MIPS64ELR6-NEXT: sllv $3, $3, $2 -; MIPS64ELR6-NEXT: nor $6, $zero, $3 +; MIPS64ELR6-NEXT: nor $4, $zero, $3 ; MIPS64ELR6-NEXT: sllv $5, $5, $2 ; MIPS64ELR6-NEXT: .LBB11_1: # %entry ; MIPS64ELR6-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64ELR6-NEXT: ll $8, 0($1) -; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: ll $7, 0($1) +; MIPS64ELR6-NEXT: and $7, $7, $3 ; MIPS64ELR6-NEXT: and $5, $5, $3 -; MIPS64ELR6-NEXT: sltu $11, $8, $5 -; MIPS64ELR6-NEXT: selnez $9, $8, $11 -; MIPS64ELR6-NEXT: seleqz $11, $5, $11 -; MIPS64ELR6-NEXT: or $9, $9, $11 -; MIPS64ELR6-NEXT: and $9, $9, $3 -; MIPS64ELR6-NEXT: and $10, $8, $6 -; MIPS64ELR6-NEXT: or $10, $10, $9 -; MIPS64ELR6-NEXT: sc $10, 0($1) -; MIPS64ELR6-NEXT: beqzc $10, .LBB11_1 +; MIPS64ELR6-NEXT: sltu $10, $7, $5 +; MIPS64ELR6-NEXT: selnez $8, $7, $10 +; MIPS64ELR6-NEXT: seleqz $10, $5, $10 +; MIPS64ELR6-NEXT: or $8, $8, $10 +; MIPS64ELR6-NEXT: and $8, $8, $3 +; MIPS64ELR6-NEXT: and $9, $7, $4 +; MIPS64ELR6-NEXT: or $9, $9, $8 +; MIPS64ELR6-NEXT: sc $9, 0($1) +; MIPS64ELR6-NEXT: beqzc $9, .LBB11_1 ; MIPS64ELR6-NEXT: # %bb.2: # %entry -; MIPS64ELR6-NEXT: and $7, $8, $3 -; MIPS64ELR6-NEXT: srlv $7, $7, $2 -; MIPS64ELR6-NEXT: seh $7, $7 +; MIPS64ELR6-NEXT: and $6, $7, $3 +; MIPS64ELR6-NEXT: srlv $6, $6, $2 +; MIPS64ELR6-NEXT: seh $6, $6 ; MIPS64ELR6-NEXT: # %bb.3: # %entry -; MIPS64ELR6-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64ELR6-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64ELR6-NEXT: # %bb.4: # %entry ; MIPS64ELR6-NEXT: sync ; MIPS64ELR6-NEXT: lw $2, 12($sp) # 4-byte Folded Reload Index: llvm/test/CodeGen/Mips/atomic.ll =================================================================== --- llvm/test/CodeGen/Mips/atomic.ll +++ llvm/test/CodeGen/Mips/atomic.ll @@ -2559,28 +2559,28 @@ ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $4, $4, $3 +; MIPS64R6O0-NEXT: andi $1, $1, 3 +; MIPS64R6O0-NEXT: xori $1, $1, 3 +; MIPS64R6O0-NEXT: sll $1, $1, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 255 +; MIPS64R6O0-NEXT: sllv $3, $3, $1 +; MIPS64R6O0-NEXT: nor $5, $zero, $3 +; MIPS64R6O0-NEXT: sllv $4, $4, $1 ; MIPS64R6O0-NEXT: .LBB8_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($2) -; MIPS64R6O0-NEXT: addu $9, $8, $4 -; MIPS64R6O0-NEXT: and $9, $9, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($2) -; MIPS64R6O0-NEXT: beqzc $10, .LBB8_1 +; MIPS64R6O0-NEXT: ll $7, 0($2) +; MIPS64R6O0-NEXT: addu $8, $7, $4 +; MIPS64R6O0-NEXT: and $8, $8, $3 +; MIPS64R6O0-NEXT: and $9, $7, $5 +; MIPS64R6O0-NEXT: or $9, $9, $8 +; MIPS64R6O0-NEXT: sc $9, 0($2) +; MIPS64R6O0-NEXT: beqzc $9, .LBB8_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: and $6, $7, $3 +; MIPS64R6O0-NEXT: srlv $6, $6, $1 +; MIPS64R6O0-NEXT: seb $6, $6 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -3075,28 +3075,28 @@ ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $4, $4, $3 +; MIPS64R6O0-NEXT: andi $1, $1, 3 +; MIPS64R6O0-NEXT: xori $1, $1, 3 +; MIPS64R6O0-NEXT: sll $1, $1, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 255 +; MIPS64R6O0-NEXT: sllv $3, $3, $1 +; MIPS64R6O0-NEXT: nor $5, $zero, $3 +; MIPS64R6O0-NEXT: sllv $4, $4, $1 ; MIPS64R6O0-NEXT: .LBB9_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($2) -; MIPS64R6O0-NEXT: subu $9, $8, $4 -; MIPS64R6O0-NEXT: and $9, $9, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($2) -; MIPS64R6O0-NEXT: beqzc $10, .LBB9_1 +; MIPS64R6O0-NEXT: ll $7, 0($2) +; MIPS64R6O0-NEXT: subu $8, $7, $4 +; MIPS64R6O0-NEXT: and $8, $8, $3 +; MIPS64R6O0-NEXT: and $9, $7, $5 +; MIPS64R6O0-NEXT: or $9, $9, $8 +; MIPS64R6O0-NEXT: sc $9, 0($2) +; MIPS64R6O0-NEXT: beqzc $9, .LBB9_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: and $6, $7, $3 +; MIPS64R6O0-NEXT: srlv $6, $6, $1 +; MIPS64R6O0-NEXT: seb $6, $6 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -3601,29 +3601,29 @@ ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $4, $4, $3 +; MIPS64R6O0-NEXT: andi $1, $1, 3 +; MIPS64R6O0-NEXT: xori $1, $1, 3 +; MIPS64R6O0-NEXT: sll $1, $1, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 255 +; MIPS64R6O0-NEXT: sllv $3, $3, $1 +; MIPS64R6O0-NEXT: nor $5, $zero, $3 +; MIPS64R6O0-NEXT: sllv $4, $4, $1 ; MIPS64R6O0-NEXT: .LBB10_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($2) -; MIPS64R6O0-NEXT: and $9, $8, $4 -; MIPS64R6O0-NEXT: nor $9, $zero, $9 -; MIPS64R6O0-NEXT: and $9, $9, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($2) -; MIPS64R6O0-NEXT: beqzc $10, .LBB10_1 +; MIPS64R6O0-NEXT: ll $7, 0($2) +; MIPS64R6O0-NEXT: and $8, $7, $4 +; MIPS64R6O0-NEXT: nor $8, $zero, $8 +; MIPS64R6O0-NEXT: and $8, $8, $3 +; MIPS64R6O0-NEXT: and $9, $7, $5 +; MIPS64R6O0-NEXT: or $9, $9, $8 +; MIPS64R6O0-NEXT: sc $9, 0($2) +; MIPS64R6O0-NEXT: beqzc $9, .LBB10_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: and $6, $7, $3 +; MIPS64R6O0-NEXT: srlv $6, $6, $1 +; MIPS64R6O0-NEXT: seb $6, $6 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -4115,27 +4115,27 @@ ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $4, $4, $3 +; MIPS64R6O0-NEXT: andi $1, $1, 3 +; MIPS64R6O0-NEXT: xori $1, $1, 3 +; MIPS64R6O0-NEXT: sll $1, $1, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 255 +; MIPS64R6O0-NEXT: sllv $3, $3, $1 +; MIPS64R6O0-NEXT: nor $5, $zero, $3 +; MIPS64R6O0-NEXT: sllv $4, $4, $1 ; MIPS64R6O0-NEXT: .LBB11_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($2) -; MIPS64R6O0-NEXT: and $9, $4, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($2) -; MIPS64R6O0-NEXT: beqzc $10, .LBB11_1 +; MIPS64R6O0-NEXT: ll $7, 0($2) +; MIPS64R6O0-NEXT: and $8, $4, $3 +; MIPS64R6O0-NEXT: and $9, $7, $5 +; MIPS64R6O0-NEXT: or $9, $9, $8 +; MIPS64R6O0-NEXT: sc $9, 0($2) +; MIPS64R6O0-NEXT: beqzc $9, .LBB11_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seb $7, $7 +; MIPS64R6O0-NEXT: and $6, $7, $3 +; MIPS64R6O0-NEXT: srlv $6, $6, $1 +; MIPS64R6O0-NEXT: seb $6, $6 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seb $2, $1 @@ -4666,32 +4666,32 @@ ; MIPS64R6O0-NEXT: ld $1, %got_disp(y)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 3 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $6, $zero, 255 -; MIPS64R6O0-NEXT: sllv $6, $6, $3 -; MIPS64R6O0-NEXT: nor $7, $zero, $6 +; MIPS64R6O0-NEXT: andi $1, $1, 3 +; MIPS64R6O0-NEXT: xori $1, $1, 3 +; MIPS64R6O0-NEXT: sll $1, $1, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 255 +; MIPS64R6O0-NEXT: sllv $3, $3, $1 +; MIPS64R6O0-NEXT: nor $6, $zero, $3 ; MIPS64R6O0-NEXT: andi $4, $4, 255 -; MIPS64R6O0-NEXT: sllv $4, $4, $3 +; MIPS64R6O0-NEXT: sllv $4, $4, $1 ; MIPS64R6O0-NEXT: andi $5, $5, 255 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 +; MIPS64R6O0-NEXT: sllv $5, $5, $1 ; MIPS64R6O0-NEXT: .LBB12_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $9, 0($2) -; MIPS64R6O0-NEXT: and $10, $9, $6 -; MIPS64R6O0-NEXT: bnec $10, $4, .LBB12_3 +; MIPS64R6O0-NEXT: ll $8, 0($2) +; MIPS64R6O0-NEXT: and $9, $8, $3 +; MIPS64R6O0-NEXT: bnec $9, $4, .LBB12_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB12_1 Depth=1 -; MIPS64R6O0-NEXT: and $9, $9, $7 -; MIPS64R6O0-NEXT: or $9, $9, $5 -; MIPS64R6O0-NEXT: sc $9, 0($2) -; MIPS64R6O0-NEXT: beqzc $9, .LBB12_1 +; MIPS64R6O0-NEXT: and $8, $8, $6 +; MIPS64R6O0-NEXT: or $8, $8, $5 +; MIPS64R6O0-NEXT: sc $8, 0($2) +; MIPS64R6O0-NEXT: beqzc $8, .LBB12_1 ; MIPS64R6O0-NEXT: .LBB12_3: # %entry -; MIPS64R6O0-NEXT: srlv $8, $10, $3 -; MIPS64R6O0-NEXT: seb $8, $8 +; MIPS64R6O0-NEXT: srlv $7, $9, $1 +; MIPS64R6O0-NEXT: seb $7, $7 ; MIPS64R6O0-NEXT: # %bb.4: # %entry -; MIPS64R6O0-NEXT: sw $8, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: # %entry ; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: daddiu $sp, $sp, 16 @@ -5236,28 +5236,28 @@ ; MIPS64R6O0-NEXT: sll $2, $2, 3 ; MIPS64R6O0-NEXT: ori $3, $zero, 255 ; MIPS64R6O0-NEXT: sllv $3, $3, $2 -; MIPS64R6O0-NEXT: nor $7, $zero, $3 -; MIPS64R6O0-NEXT: andi $8, $5, 255 -; MIPS64R6O0-NEXT: sllv $8, $8, $2 +; MIPS64R6O0-NEXT: nor $4, $zero, $3 +; MIPS64R6O0-NEXT: andi $7, $5, 255 +; MIPS64R6O0-NEXT: sllv $7, $7, $2 ; MIPS64R6O0-NEXT: andi $6, $6, 255 ; MIPS64R6O0-NEXT: sllv $6, $6, $2 ; MIPS64R6O0-NEXT: .LBB13_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $10, 0($1) -; MIPS64R6O0-NEXT: and $11, $10, $3 -; MIPS64R6O0-NEXT: bnec $11, $8, .LBB13_3 +; MIPS64R6O0-NEXT: ll $9, 0($1) +; MIPS64R6O0-NEXT: and $10, $9, $3 +; MIPS64R6O0-NEXT: bnec $10, $7, .LBB13_3 ; MIPS64R6O0-NEXT: # %bb.2: # %entry ; MIPS64R6O0-NEXT: # in Loop: Header=BB13_1 Depth=1 -; MIPS64R6O0-NEXT: and $10, $10, $7 -; MIPS64R6O0-NEXT: or $10, $10, $6 -; MIPS64R6O0-NEXT: sc $10, 0($1) -; MIPS64R6O0-NEXT: beqzc $10, .LBB13_1 +; MIPS64R6O0-NEXT: and $9, $9, $4 +; MIPS64R6O0-NEXT: or $9, $9, $6 +; MIPS64R6O0-NEXT: sc $9, 0($1) +; MIPS64R6O0-NEXT: beqzc $9, .LBB13_1 ; MIPS64R6O0-NEXT: .LBB13_3: # %entry -; MIPS64R6O0-NEXT: srlv $9, $11, $2 -; MIPS64R6O0-NEXT: seb $9, $9 +; MIPS64R6O0-NEXT: srlv $8, $10, $2 +; MIPS64R6O0-NEXT: seb $8, $8 ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $9, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: # %entry ; MIPS64R6O0-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: lw $2, 12($sp) # 4-byte Folded Reload @@ -5775,28 +5775,28 @@ ; MIPS64R6O0-NEXT: ld $1, %got_disp(z)($1) ; MIPS64R6O0-NEXT: daddiu $2, $zero, -4 ; MIPS64R6O0-NEXT: and $2, $1, $2 -; MIPS64R6O0-NEXT: andi $3, $1, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 2 -; MIPS64R6O0-NEXT: sll $3, $3, 3 -; MIPS64R6O0-NEXT: ori $5, $zero, 65535 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 -; MIPS64R6O0-NEXT: nor $6, $zero, $5 -; MIPS64R6O0-NEXT: sllv $4, $4, $3 +; MIPS64R6O0-NEXT: andi $1, $1, 3 +; MIPS64R6O0-NEXT: xori $1, $1, 2 +; MIPS64R6O0-NEXT: sll $1, $1, 3 +; MIPS64R6O0-NEXT: ori $3, $zero, 65535 +; MIPS64R6O0-NEXT: sllv $3, $3, $1 +; MIPS64R6O0-NEXT: nor $5, $zero, $3 +; MIPS64R6O0-NEXT: sllv $4, $4, $1 ; MIPS64R6O0-NEXT: .LBB14_1: # %entry ; MIPS64R6O0-NEXT: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $8, 0($2) -; MIPS64R6O0-NEXT: addu $9, $8, $4 -; MIPS64R6O0-NEXT: and $9, $9, $5 -; MIPS64R6O0-NEXT: and $10, $8, $6 -; MIPS64R6O0-NEXT: or $10, $10, $9 -; MIPS64R6O0-NEXT: sc $10, 0($2) -; MIPS64R6O0-NEXT: beqzc $10, .LBB14_1 +; MIPS64R6O0-NEXT: ll $7, 0($2) +; MIPS64R6O0-NEXT: addu $8, $7, $4 +; MIPS64R6O0-NEXT: and $8, $8, $3 +; MIPS64R6O0-NEXT: and $9, $7, $5 +; MIPS64R6O0-NEXT: or $9, $9, $8 +; MIPS64R6O0-NEXT: sc $9, 0($2) +; MIPS64R6O0-NEXT: beqzc $9, .LBB14_1 ; MIPS64R6O0-NEXT: # %bb.2: # %entry -; MIPS64R6O0-NEXT: and $7, $8, $5 -; MIPS64R6O0-NEXT: srlv $7, $7, $3 -; MIPS64R6O0-NEXT: seh $7, $7 +; MIPS64R6O0-NEXT: and $6, $7, $3 +; MIPS64R6O0-NEXT: srlv $6, $6, $1 +; MIPS64R6O0-NEXT: seh $6, $6 ; MIPS64R6O0-NEXT: # %bb.3: # %entry -; MIPS64R6O0-NEXT: sw $7, 12($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $6, 12($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.4: # %entry ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seh $2, $1 @@ -6359,33 +6359,33 @@ ; MIPS64R6O0-NEXT: sll $3, $5, 0 ; MIPS64R6O0-NEXT: addu $2, $3, $2 ; MIPS64R6O0-NEXT: sync -; MIPS64R6O0-NEXT: daddiu $8, $zero, -4 -; MIPS64R6O0-NEXT: and $8, $4, $8 -; MIPS64R6O0-NEXT: andi $3, $4, 3 -; MIPS64R6O0-NEXT: xori $3, $3, 2 -; MIPS64R6O0-NEXT: sll $3, $3, 3 +; MIPS64R6O0-NEXT: daddiu $3, $zero, -4 +; MIPS64R6O0-NEXT: and $3, $4, $3 +; MIPS64R6O0-NEXT: andi $4, $4, 3 +; MIPS64R6O0-NEXT: xori $4, $4, 2 +; MIPS64R6O0-NEXT: sll $4, $4, 3 ; MIPS64R6O0-NEXT: ori $5, $zero, 65535 -; MIPS64R6O0-NEXT: sllv $5, $5, $3 +; MIPS64R6O0-NEXT: sllv $5, $5, $4 ; MIPS64R6O0-NEXT: nor $6, $zero, $5 ; MIPS64R6O0-NEXT: andi $7, $2, 65535 -; MIPS64R6O0-NEXT: sllv $7, $7, $3 +; MIPS64R6O0-NEXT: sllv $7, $7, $4 ; MIPS64R6O0-NEXT: andi $1, $1, 65535 -; MIPS64R6O0-NEXT: sllv $1, $1, $3 +; MIPS64R6O0-NEXT: sllv $1, $1, $4 ; MIPS64R6O0-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1 -; MIPS64R6O0-NEXT: ll $10, 0($8) -; MIPS64R6O0-NEXT: and $11, $10, $5 -; MIPS64R6O0-NEXT: bnec $11, $7, .LBB15_3 +; MIPS64R6O0-NEXT: ll $9, 0($3) +; MIPS64R6O0-NEXT: and $10, $9, $5 +; MIPS64R6O0-NEXT: bnec $10, $7, .LBB15_3 ; MIPS64R6O0-NEXT: # %bb.2: # in Loop: Header=BB15_1 Depth=1 -; MIPS64R6O0-NEXT: and $10, $10, $6 -; MIPS64R6O0-NEXT: or $10, $10, $1 -; MIPS64R6O0-NEXT: sc $10, 0($8) -; MIPS64R6O0-NEXT: beqzc $10, .LBB15_1 +; MIPS64R6O0-NEXT: and $9, $9, $6 +; MIPS64R6O0-NEXT: or $9, $9, $1 +; MIPS64R6O0-NEXT: sc $9, 0($3) +; MIPS64R6O0-NEXT: beqzc $9, .LBB15_1 ; MIPS64R6O0-NEXT: .LBB15_3: -; MIPS64R6O0-NEXT: srlv $9, $11, $3 -; MIPS64R6O0-NEXT: seh $9, $9 +; MIPS64R6O0-NEXT: srlv $8, $10, $4 +; MIPS64R6O0-NEXT: seh $8, $8 ; MIPS64R6O0-NEXT: # %bb.4: ; MIPS64R6O0-NEXT: sw $2, 12($sp) # 4-byte Folded Spill -; MIPS64R6O0-NEXT: sw $9, 8($sp) # 4-byte Folded Spill +; MIPS64R6O0-NEXT: sw $8, 8($sp) # 4-byte Folded Spill ; MIPS64R6O0-NEXT: # %bb.5: ; MIPS64R6O0-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS64R6O0-NEXT: seh $2, $1 @@ -7145,8 +7145,8 @@ ; MIPS64R6O0-NEXT: sc $6, 0($1) ; MIPS64R6O0-NEXT: beqzc $6, .LBB17_1 ; MIPS64R6O0-NEXT: .LBB17_3: # %entry -; MIPS64R6O0-NEXT: xor $2, $5, $3 -; MIPS64R6O0-NEXT: sltiu $2, $2, 1 +; MIPS64R6O0-NEXT: xor $1, $5, $3 +; MIPS64R6O0-NEXT: sltiu $2, $1, 1 ; MIPS64R6O0-NEXT: sync ; MIPS64R6O0-NEXT: jrc $ra ; Index: llvm/test/CodeGen/Mips/implicit-sret.ll =================================================================== --- llvm/test/CodeGen/Mips/implicit-sret.ll +++ llvm/test/CodeGen/Mips/implicit-sret.ll @@ -48,8 +48,8 @@ ; CHECK-NEXT: sd $zero, 8($4) ; CHECK-NEXT: daddiu $3, $zero, 30 ; CHECK-NEXT: sd $3, 24($4) -; CHECK-NEXT: addiu $5, $zero, 10 -; CHECK-NEXT: sw $5, 0($4) +; CHECK-NEXT: addiu $3, $zero, 10 +; CHECK-NEXT: sw $3, 0($4) ; CHECK-NEXT: jr $ra ; CHECK-NEXT: nop ret { i32, i128, i64 } { i32 10, i128 20, i64 30 } @@ -70,10 +70,12 @@ ; CHECK-NEXT: lw $3, 4($sp) ; CHECK-NEXT: # implicit-def: $a0_64 ; CHECK-NEXT: move $4, $3 -; CHECK-NEXT: # implicit-def: $a1_64 -; CHECK-NEXT: move $5, $2 -; CHECK-NEXT: # implicit-def: $a2_64 -; CHECK-NEXT: move $6, $1 +; CHECK-NEXT: # implicit-def: $v1_64 +; CHECK-NEXT: move $3, $2 +; CHECK-NEXT: # implicit-def: $v0_64 +; CHECK-NEXT: move $2, $1 +; CHECK-NEXT: move $5, $3 +; CHECK-NEXT: move $6, $2 ; CHECK-NEXT: jal use_sret2 ; CHECK-NEXT: nop ; CHECK-NEXT: ld $ra, 24($sp) # 8-byte Folded Reload Index: llvm/test/CodeGen/PowerPC/addegluecrash.ll =================================================================== --- llvm/test/CodeGen/PowerPC/addegluecrash.ll +++ llvm/test/CodeGen/PowerPC/addegluecrash.ll @@ -21,11 +21,11 @@ ; CHECK-NEXT: addze 5, 5 ; CHECK-NEXT: add 4, 5, 4 ; CHECK-NEXT: cmpld 7, 4, 5 -; CHECK-NEXT: mfocrf 10, 1 -; CHECK-NEXT: rlwinm 10, 10, 29, 31, 31 -; CHECK-NEXT: # implicit-def: $x4 -; CHECK-NEXT: mr 4, 10 -; CHECK-NEXT: clrldi 4, 4, 32 +; CHECK-NEXT: mfocrf 4, 1 +; CHECK-NEXT: rlwinm 4, 4, 29, 31, 31 +; CHECK-NEXT: # implicit-def: $x5 +; CHECK-NEXT: mr 5, 4 +; CHECK-NEXT: clrldi 4, 5, 32 ; CHECK-NEXT: std 4, 0(3) ; CHECK-NEXT: blr %1 = load i64, i64* %a, align 8 Index: llvm/test/CodeGen/PowerPC/vsx.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vsx.ll +++ llvm/test/CodeGen/PowerPC/vsx.ll @@ -1548,8 +1548,8 @@ ; CHECK-FISL-NEXT: ld r3, -24(r1) ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs1, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs1, vs1 +; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 +; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test46: @@ -1616,8 +1616,8 @@ ; CHECK-FISL-NEXT: ld r3, -24(r1) ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 -; CHECK-FISL-NEXT: lxvd2x vs1, 0, r3 -; CHECK-FISL-NEXT: xxlor v2, vs1, vs1 +; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 +; CHECK-FISL-NEXT: xxlor v2, vs0, vs0 ; CHECK-FISL-NEXT: blr ; ; CHECK-LE-LABEL: test47: @@ -1859,13 +1859,13 @@ ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r4, -20(r1) -; CHECK-FISL-NEXT: ld r3, -40(r1) -; CHECK-FISL-NEXT: sld r3, r3, r4 +; CHECK-FISL-NEXT: lwz r3, -20(r1) +; CHECK-FISL-NEXT: ld r4, -40(r1) +; CHECK-FISL-NEXT: sld r3, r4, r3 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r4, -28(r1) -; CHECK-FISL-NEXT: ld r3, -48(r1) -; CHECK-FISL-NEXT: sld r3, r3, r4 +; CHECK-FISL-NEXT: lwz r3, -28(r1) +; CHECK-FISL-NEXT: ld r4, -48(r1) +; CHECK-FISL-NEXT: sld r3, r4, r3 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 ; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 @@ -1925,13 +1925,13 @@ ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r4, -20(r1) -; CHECK-FISL-NEXT: ld r3, -40(r1) -; CHECK-FISL-NEXT: srd r3, r3, r4 +; CHECK-FISL-NEXT: lwz r3, -20(r1) +; CHECK-FISL-NEXT: ld r4, -40(r1) +; CHECK-FISL-NEXT: srd r3, r4, r3 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r4, -28(r1) -; CHECK-FISL-NEXT: ld r3, -48(r1) -; CHECK-FISL-NEXT: srd r3, r3, r4 +; CHECK-FISL-NEXT: lwz r3, -28(r1) +; CHECK-FISL-NEXT: ld r4, -48(r1) +; CHECK-FISL-NEXT: srd r3, r4, r3 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 ; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 @@ -1991,13 +1991,13 @@ ; CHECK-FISL-NEXT: stxvd2x v3, 0, r3 ; CHECK-FISL-NEXT: addi r3, r1, -48 ; CHECK-FISL-NEXT: stxvd2x v2, 0, r3 -; CHECK-FISL-NEXT: lwz r4, -20(r1) -; CHECK-FISL-NEXT: ld r3, -40(r1) -; CHECK-FISL-NEXT: srad r3, r3, r4 +; CHECK-FISL-NEXT: lwz r3, -20(r1) +; CHECK-FISL-NEXT: ld r4, -40(r1) +; CHECK-FISL-NEXT: srad r3, r4, r3 ; CHECK-FISL-NEXT: std r3, -8(r1) -; CHECK-FISL-NEXT: lwz r4, -28(r1) -; CHECK-FISL-NEXT: ld r3, -48(r1) -; CHECK-FISL-NEXT: srad r3, r3, r4 +; CHECK-FISL-NEXT: lwz r3, -28(r1) +; CHECK-FISL-NEXT: ld r4, -48(r1) +; CHECK-FISL-NEXT: srad r3, r4, r3 ; CHECK-FISL-NEXT: std r3, -16(r1) ; CHECK-FISL-NEXT: addi r3, r1, -16 ; CHECK-FISL-NEXT: lxvd2x vs0, 0, r3 @@ -2426,12 +2426,12 @@ ; CHECK-FISL: # %bb.0: ; CHECK-FISL-NEXT: # kill: def $r3 killed $r3 killed $x3 ; CHECK-FISL-NEXT: stw r3, -16(r1) -; CHECK-FISL-NEXT: addi r4, r1, -16 -; CHECK-FISL-NEXT: lxvw4x vs0, 0, r4 +; CHECK-FISL-NEXT: addi r3, r1, -16 +; CHECK-FISL-NEXT: lxvw4x vs0, 0, r3 ; CHECK-FISL-NEXT: xxspltw v2, vs0, 0 -; CHECK-FISL-NEXT: addis r4, r2, .LCPI65_0@toc@ha -; CHECK-FISL-NEXT: addi r4, r4, .LCPI65_0@toc@l -; CHECK-FISL-NEXT: lxvw4x v3, 0, r4 +; CHECK-FISL-NEXT: addis r3, r2, .LCPI65_0@toc@ha +; CHECK-FISL-NEXT: addi r3, r3, .LCPI65_0@toc@l +; CHECK-FISL-NEXT: lxvw4x v3, 0, r3 ; CHECK-FISL-NEXT: vadduwm v2, v2, v3 ; CHECK-FISL-NEXT: blr ; Index: llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll =================================================================== --- llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll +++ llvm/test/CodeGen/X86/2009-04-14-IllegalRegs.ll @@ -8,34 +8,34 @@ define i32 @z() nounwind ssp { ; CHECK-LABEL: z: ; CHECK: ## %bb.0: ## %entry -; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: subl $144, %esp +; CHECK-NEXT: subl $148, %esp ; CHECK-NEXT: movl L___stack_chk_guard$non_lazy_ptr, %eax ; CHECK-NEXT: movl (%eax), %eax ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movb $48, {{[0-9]+}}(%esp) -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl -; CHECK-NEXT: movb %cl, {{[0-9]+}}(%esp) +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al +; CHECK-NEXT: movb %al, {{[0-9]+}}(%esp) ; CHECK-NEXT: movb $15, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl %esp, %eax -; CHECK-NEXT: movl $8, %edx -; CHECK-NEXT: leal {{[0-9]+}}(%esp), %esi -; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: movl $8, %ecx +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %edx +; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: movl %eax, %edi -; CHECK-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill +; CHECK-NEXT: movl %edx, %esi ; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: addl $36, %ecx +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload ; CHECK-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill -; CHECK-NEXT: movl %edx, %ecx +; CHECK-NEXT: movl %esi, %ecx ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Reload -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi ## 4-byte Reload +; CHECK-NEXT: movl %edx, %esi ; CHECK-NEXT: rep;movsl (%esi), %es:(%edi) -; CHECK-NEXT: movb {{[0-9]+}}(%esp), %bl -; CHECK-NEXT: movb %bl, 32(%eax) -; CHECK-NEXT: movb %bl, 68(%eax) +; CHECK-NEXT: movb {{[0-9]+}}(%esp), %cl +; CHECK-NEXT: movb %cl, 32(%eax) +; CHECK-NEXT: movb %cl, 68(%eax) ; CHECK-NEXT: calll _f ; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -50,10 +50,9 @@ ; CHECK-NEXT: jne LBB0_3 ; CHECK-NEXT: ## %bb.2: ## %SP_return ; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload -; CHECK-NEXT: addl $144, %esp +; CHECK-NEXT: addl $148, %esp ; CHECK-NEXT: popl %esi ; CHECK-NEXT: popl %edi -; CHECK-NEXT: popl %ebx ; CHECK-NEXT: retl ; CHECK-NEXT: LBB0_3: ## %CallStackCheckFailBlk ; CHECK-NEXT: calll ___stack_chk_fail Index: llvm/test/CodeGen/X86/atomic-unordered.ll =================================================================== --- llvm/test/CodeGen/X86/atomic-unordered.ll +++ llvm/test/CodeGen/X86/atomic-unordered.ll @@ -126,8 +126,8 @@ ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O0-NEXT: andl $-256, %eax -; CHECK-O0-NEXT: movl %eax, %ecx -; CHECK-O0-NEXT: movq %rcx, (%rdi) +; CHECK-O0-NEXT: # kill: def $rax killed $eax +; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: narrow_writeback_and: @@ -231,10 +231,10 @@ ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O0-NEXT: .cfi_offset %rbx, -16 ; CHECK-O0-NEXT: xorl %eax, %eax -; CHECK-O0-NEXT: movl %eax, %ecx -; CHECK-O0-NEXT: movq %rcx, %rax -; CHECK-O0-NEXT: movq %rcx, %rdx -; CHECK-O0-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: # kill: def $rax killed $eax +; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Reload ; CHECK-O0-NEXT: lock cmpxchg16b (%rdi) ; CHECK-O0-NEXT: popq %rbx @@ -326,14 +326,14 @@ ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; CHECK-O0-NEXT: callq __atomic_load ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rax +; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rcx ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdx ; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rsi -; CHECK-O0-NEXT: movq {{[0-9]+}}(%rsp), %rdi -; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %r9 # 8-byte Reload -; CHECK-O0-NEXT: movq %rdi, 24(%r9) -; CHECK-O0-NEXT: movq %rsi, 16(%r9) -; CHECK-O0-NEXT: movq %rdx, 8(%r9) -; CHECK-O0-NEXT: movq %rax, (%r9) +; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi # 8-byte Reload +; CHECK-O0-NEXT: movq %rsi, 24(%rdi) +; CHECK-O0-NEXT: movq %rdx, 16(%rdi) +; CHECK-O0-NEXT: movq %rcx, 8(%rdi) +; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; CHECK-O0-NEXT: addq $56, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 @@ -831,8 +831,8 @@ ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: movl %ecx, %edx -; CHECK-O0-NEXT: movl $15, %esi -; CHECK-O0-NEXT: divq %rsi +; CHECK-O0-NEXT: movl $15, %ecx +; CHECK-O0-NEXT: divq %rcx ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_udiv1: @@ -1024,8 +1024,8 @@ ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: xorl %ecx, %ecx ; CHECK-O0-NEXT: movl %ecx, %edx -; CHECK-O0-NEXT: movl $15, %esi -; CHECK-O0-NEXT: divq %rsi +; CHECK-O0-NEXT: movl $15, %ecx +; CHECK-O0-NEXT: divq %rcx ; CHECK-O0-NEXT: movq %rdx, %rax ; CHECK-O0-NEXT: retq ; @@ -1475,9 +1475,9 @@ ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: movq (%rsi), %rcx ; CHECK-O0-NEXT: subq %rcx, %rax -; CHECK-O0-NEXT: sete %dl +; CHECK-O0-NEXT: sete %cl ; CHECK-O0-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-O0-NEXT: movb %dl, %al +; CHECK-O0-NEXT: movb %cl, %al ; CHECK-O0-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_fold_icmp3: @@ -2076,8 +2076,8 @@ ; CHECK-O0-NEXT: movq (%rdi), %rax ; CHECK-O0-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-O0-NEXT: andl $15, %eax -; CHECK-O0-NEXT: movl %eax, %ecx -; CHECK-O0-NEXT: movq %rcx, (%rdi) +; CHECK-O0-NEXT: # kill: def $rax killed $eax +; CHECK-O0-NEXT: movq %rax, (%rdi) ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: rmw_fold_and1: @@ -2541,9 +2541,8 @@ ; CHECK-O0-CUR-LABEL: load_i8_anyext_i16: ; CHECK-O0-CUR: # %bb.0: ; CHECK-O0-CUR-NEXT: movb (%rdi), %al -; CHECK-O0-CUR-NEXT: movzbl %al, %ecx -; CHECK-O0-CUR-NEXT: # kill: def $cx killed $cx killed $ecx -; CHECK-O0-CUR-NEXT: movw %cx, %ax +; CHECK-O0-CUR-NEXT: movzbl %al, %eax +; CHECK-O0-CUR-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-O0-CUR-NEXT: retq ; ; CHECK-O3-CUR-LABEL: load_i8_anyext_i16: @@ -2671,13 +2670,12 @@ ; CHECK-O0: # %bb.0: ; CHECK-O0-NEXT: movb (%rdi), %al ; CHECK-O0-NEXT: movb 1(%rdi), %cl -; CHECK-O0-NEXT: movzbl %al, %edx -; CHECK-O0-NEXT: # kill: def $dx killed $dx killed $edx -; CHECK-O0-NEXT: movzbl %cl, %esi -; CHECK-O0-NEXT: # kill: def $si killed $si killed $esi -; CHECK-O0-NEXT: shlw $8, %si -; CHECK-O0-NEXT: orw %si, %dx -; CHECK-O0-NEXT: movw %dx, %ax +; CHECK-O0-NEXT: movzbl %al, %eax +; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-O0-NEXT: movzbl %cl, %ecx +; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx +; CHECK-O0-NEXT: shlw $8, %cx +; CHECK-O0-NEXT: orw %cx, %ax ; CHECK-O0-NEXT: retq ; ; CHECK-O3-LABEL: load_combine: Index: llvm/test/CodeGen/X86/atomic32.ll =================================================================== --- llvm/test/CodeGen/X86/atomic32.ll +++ llvm/test/CodeGen/X86/atomic32.ll @@ -70,8 +70,8 @@ ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill @@ -94,8 +94,8 @@ ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %dl -; X86-NEXT: testb $1, %dl +; X86-NEXT: sete %cl +; X86-NEXT: testb $1, %cl ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill @@ -124,8 +124,8 @@ ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: orl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill @@ -148,8 +148,8 @@ ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: orl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %dl -; X86-NEXT: testb $1, %dl +; X86-NEXT: sete %cl +; X86-NEXT: testb $1, %cl ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill @@ -178,8 +178,8 @@ ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: xorl $5, %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill @@ -202,8 +202,8 @@ ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: xorl $5, %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %dl -; X86-NEXT: testb $1, %dl +; X86-NEXT: sete %cl +; X86-NEXT: testb $1, %cl ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill @@ -234,8 +234,8 @@ ; X64-NEXT: andl %edx, %ecx ; X64-NEXT: notl %ecx ; X64-NEXT: lock cmpxchgl %ecx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB5_2 ; X64-NEXT: jmp .LBB5_1 @@ -244,7 +244,6 @@ ; ; X86-LABEL: atomic_fetch_nand32: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx ; X86-NEXT: subl $8, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl sc32, %ecx @@ -258,14 +257,13 @@ ; X86-NEXT: andl %edx, %ecx ; X86-NEXT: notl %ecx ; X86-NEXT: lock cmpxchgl %ecx, sc32 -; X86-NEXT: sete %bl -; X86-NEXT: testb $1, %bl +; X86-NEXT: sete %cl +; X86-NEXT: testb $1, %cl ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill ; X86-NEXT: jne .LBB5_2 ; X86-NEXT: jmp .LBB5_1 ; X86-NEXT: .LBB5_2: # %atomicrmw.end ; X86-NEXT: addl $8, %esp -; X86-NEXT: popl %ebx ; X86-NEXT: retl %t1 = atomicrmw nand i32* @sc32, i32 %x acquire ret void @@ -285,8 +283,8 @@ ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmovgel %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB6_2 @@ -296,7 +294,6 @@ ; ; X86-CMOV-LABEL: atomic_fetch_max32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: pushl %ebx ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx @@ -310,20 +307,18 @@ ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmovgel %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %bl -; X86-CMOV-NEXT: testb $1, %bl +; X86-CMOV-NEXT: sete %dl +; X86-CMOV-NEXT: testb $1, %dl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB6_2 ; X86-CMOV-NEXT: jmp .LBB6_1 ; X86-CMOV-NEXT: .LBB6_2: # %atomicrmw.end ; X86-CMOV-NEXT: addl $12, %esp -; X86-CMOV-NEXT: popl %ebx ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_max32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %ebx ; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $20, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -352,20 +347,18 @@ ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %bl -; X86-NOCMOV-NEXT: testb $1, %bl +; X86-NOCMOV-NEXT: sete %dl +; X86-NOCMOV-NEXT: testb $1, %dl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB6_2 ; X86-NOCMOV-NEXT: jmp .LBB6_1 ; X86-NOCMOV-NEXT: .LBB6_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $20, %esp ; X86-NOCMOV-NEXT: popl %esi -; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_max32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %ebx ; X86-NOX87-NEXT: pushl %esi ; X86-NOX87-NEXT: subl $20, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -394,15 +387,14 @@ ; X86-NOX87-NEXT: movl %ecx, %eax ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %bl -; X86-NOX87-NEXT: testb $1, %bl +; X86-NOX87-NEXT: sete %dl +; X86-NOX87-NEXT: testb $1, %dl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB6_2 ; X86-NOX87-NEXT: jmp .LBB6_1 ; X86-NOX87-NEXT: .LBB6_2: # %atomicrmw.end ; X86-NOX87-NEXT: addl $20, %esp ; X86-NOX87-NEXT: popl %esi -; X86-NOX87-NEXT: popl %ebx ; X86-NOX87-NEXT: retl %t1 = atomicrmw max i32* @sc32, i32 %x acquire ret void @@ -422,8 +414,8 @@ ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmovlel %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB7_2 @@ -433,7 +425,6 @@ ; ; X86-CMOV-LABEL: atomic_fetch_min32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: pushl %ebx ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx @@ -447,20 +438,18 @@ ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmovlel %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %bl -; X86-CMOV-NEXT: testb $1, %bl +; X86-CMOV-NEXT: sete %dl +; X86-CMOV-NEXT: testb $1, %dl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB7_2 ; X86-CMOV-NEXT: jmp .LBB7_1 ; X86-CMOV-NEXT: .LBB7_2: # %atomicrmw.end ; X86-CMOV-NEXT: addl $12, %esp -; X86-CMOV-NEXT: popl %ebx ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_min32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %ebx ; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $20, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -489,20 +478,18 @@ ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %bl -; X86-NOCMOV-NEXT: testb $1, %bl +; X86-NOCMOV-NEXT: sete %dl +; X86-NOCMOV-NEXT: testb $1, %dl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB7_2 ; X86-NOCMOV-NEXT: jmp .LBB7_1 ; X86-NOCMOV-NEXT: .LBB7_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $20, %esp ; X86-NOCMOV-NEXT: popl %esi -; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_min32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %ebx ; X86-NOX87-NEXT: pushl %esi ; X86-NOX87-NEXT: subl $20, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -531,15 +518,14 @@ ; X86-NOX87-NEXT: movl %ecx, %eax ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %bl -; X86-NOX87-NEXT: testb $1, %bl +; X86-NOX87-NEXT: sete %dl +; X86-NOX87-NEXT: testb $1, %dl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB7_2 ; X86-NOX87-NEXT: jmp .LBB7_1 ; X86-NOX87-NEXT: .LBB7_2: # %atomicrmw.end ; X86-NOX87-NEXT: addl $20, %esp ; X86-NOX87-NEXT: popl %esi -; X86-NOX87-NEXT: popl %ebx ; X86-NOX87-NEXT: retl %t1 = atomicrmw min i32* @sc32, i32 %x acquire ret void @@ -559,8 +545,8 @@ ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmoval %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB8_2 @@ -570,7 +556,6 @@ ; ; X86-CMOV-LABEL: atomic_fetch_umax32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: pushl %ebx ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx @@ -584,20 +569,18 @@ ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmoval %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %bl -; X86-CMOV-NEXT: testb $1, %bl +; X86-CMOV-NEXT: sete %dl +; X86-CMOV-NEXT: testb $1, %dl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB8_2 ; X86-CMOV-NEXT: jmp .LBB8_1 ; X86-CMOV-NEXT: .LBB8_2: # %atomicrmw.end ; X86-CMOV-NEXT: addl $12, %esp -; X86-CMOV-NEXT: popl %ebx ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_umax32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %ebx ; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $20, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -626,20 +609,18 @@ ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %bl -; X86-NOCMOV-NEXT: testb $1, %bl +; X86-NOCMOV-NEXT: sete %dl +; X86-NOCMOV-NEXT: testb $1, %dl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB8_2 ; X86-NOCMOV-NEXT: jmp .LBB8_1 ; X86-NOCMOV-NEXT: .LBB8_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $20, %esp ; X86-NOCMOV-NEXT: popl %esi -; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_umax32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %ebx ; X86-NOX87-NEXT: pushl %esi ; X86-NOX87-NEXT: subl $20, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -668,15 +649,14 @@ ; X86-NOX87-NEXT: movl %ecx, %eax ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %bl -; X86-NOX87-NEXT: testb $1, %bl +; X86-NOX87-NEXT: sete %dl +; X86-NOX87-NEXT: testb $1, %dl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB8_2 ; X86-NOX87-NEXT: jmp .LBB8_1 ; X86-NOX87-NEXT: .LBB8_2: # %atomicrmw.end ; X86-NOX87-NEXT: addl $20, %esp ; X86-NOX87-NEXT: popl %esi -; X86-NOX87-NEXT: popl %ebx ; X86-NOX87-NEXT: retl %t1 = atomicrmw umax i32* @sc32, i32 %x acquire ret void @@ -696,8 +676,8 @@ ; X64-NEXT: subl %edx, %ecx ; X64-NEXT: cmovbel %eax, %edx ; X64-NEXT: lock cmpxchgl %edx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill ; X64-NEXT: jne .LBB9_2 @@ -707,7 +687,6 @@ ; ; X86-CMOV-LABEL: atomic_fetch_umin32: ; X86-CMOV: # %bb.0: -; X86-CMOV-NEXT: pushl %ebx ; X86-CMOV-NEXT: subl $12, %esp ; X86-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-CMOV-NEXT: movl sc32, %ecx @@ -721,20 +700,18 @@ ; X86-CMOV-NEXT: subl %edx, %ecx ; X86-CMOV-NEXT: cmovbel %eax, %edx ; X86-CMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-CMOV-NEXT: sete %bl -; X86-CMOV-NEXT: testb $1, %bl +; X86-CMOV-NEXT: sete %dl +; X86-CMOV-NEXT: testb $1, %dl ; X86-CMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-CMOV-NEXT: movl %ecx, (%esp) # 4-byte Spill ; X86-CMOV-NEXT: jne .LBB9_2 ; X86-CMOV-NEXT: jmp .LBB9_1 ; X86-CMOV-NEXT: .LBB9_2: # %atomicrmw.end ; X86-CMOV-NEXT: addl $12, %esp -; X86-CMOV-NEXT: popl %ebx ; X86-CMOV-NEXT: retl ; ; X86-NOCMOV-LABEL: atomic_fetch_umin32: ; X86-NOCMOV: # %bb.0: -; X86-NOCMOV-NEXT: pushl %ebx ; X86-NOCMOV-NEXT: pushl %esi ; X86-NOCMOV-NEXT: subl $20, %esp ; X86-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -763,20 +740,18 @@ ; X86-NOCMOV-NEXT: movl %ecx, %eax ; X86-NOCMOV-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOCMOV-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOCMOV-NEXT: sete %bl -; X86-NOCMOV-NEXT: testb $1, %bl +; X86-NOCMOV-NEXT: sete %dl +; X86-NOCMOV-NEXT: testb $1, %dl ; X86-NOCMOV-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOCMOV-NEXT: jne .LBB9_2 ; X86-NOCMOV-NEXT: jmp .LBB9_1 ; X86-NOCMOV-NEXT: .LBB9_2: # %atomicrmw.end ; X86-NOCMOV-NEXT: addl $20, %esp ; X86-NOCMOV-NEXT: popl %esi -; X86-NOCMOV-NEXT: popl %ebx ; X86-NOCMOV-NEXT: retl ; ; X86-NOX87-LABEL: atomic_fetch_umin32: ; X86-NOX87: # %bb.0: -; X86-NOX87-NEXT: pushl %ebx ; X86-NOX87-NEXT: pushl %esi ; X86-NOX87-NEXT: subl $20, %esp ; X86-NOX87-NEXT: movl {{[0-9]+}}(%esp), %eax @@ -805,15 +780,14 @@ ; X86-NOX87-NEXT: movl %ecx, %eax ; X86-NOX87-NEXT: movl (%esp), %edx # 4-byte Reload ; X86-NOX87-NEXT: lock cmpxchgl %edx, sc32 -; X86-NOX87-NEXT: sete %bl -; X86-NOX87-NEXT: testb $1, %bl +; X86-NOX87-NEXT: sete %dl +; X86-NOX87-NEXT: testb $1, %dl ; X86-NOX87-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; X86-NOX87-NEXT: jne .LBB9_2 ; X86-NOX87-NEXT: jmp .LBB9_1 ; X86-NOX87-NEXT: .LBB9_2: # %atomicrmw.end ; X86-NOX87-NEXT: addl $20, %esp ; X86-NOX87-NEXT: popl %esi -; X86-NOX87-NEXT: popl %ebx ; X86-NOX87-NEXT: retl %t1 = atomicrmw umin i32* @sc32, i32 %x acquire ret void Index: llvm/test/CodeGen/X86/atomic64.ll =================================================================== --- llvm/test/CodeGen/X86/atomic64.ll +++ llvm/test/CodeGen/X86/atomic64.ll @@ -137,12 +137,12 @@ ; X64-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl $5, %ecx -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil -; X64-NEXT: movq %rax, %rdx -; X64-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; X64-NEXT: # kill: def $rcx killed $ecx +; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl +; X64-NEXT: movq %rax, %rcx +; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB2_2 ; X64-NEXT: jmp .LBB2_1 @@ -202,8 +202,8 @@ ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: orq $5, %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -265,8 +265,8 @@ ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: xorq $5, %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) -; X64-NEXT: sete %dl -; X64-NEXT: testb $1, %dl +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movq %rax, %rcx ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill @@ -330,8 +330,8 @@ ; X64-NEXT: andq %rdx, %rcx ; X64-NEXT: notq %rcx ; X64-NEXT: lock cmpxchgq %rcx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %cl +; X64-NEXT: testb $1, %cl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB5_2 ; X64-NEXT: jmp .LBB5_1 @@ -373,8 +373,8 @@ ; X64-NEXT: subq %rdx, %rcx ; X64-NEXT: cmovgeq %rax, %rdx ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB6_2 @@ -473,8 +473,8 @@ ; X64-NEXT: subq %rdx, %rcx ; X64-NEXT: cmovleq %rax, %rdx ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB7_2 @@ -571,8 +571,8 @@ ; X64-NEXT: subq %rdx, %rcx ; X64-NEXT: cmovaq %rax, %rdx ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB8_2 @@ -669,8 +669,8 @@ ; X64-NEXT: subq %rdx, %rcx ; X64-NEXT: cmovbeq %rax, %rdx ; X64-NEXT: lock cmpxchgq %rdx, {{.*}}(%rip) -; X64-NEXT: sete %sil -; X64-NEXT: testb $1, %sil +; X64-NEXT: sete %dl +; X64-NEXT: testb $1, %dl ; X64-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill ; X64-NEXT: jne .LBB9_2 Index: llvm/test/CodeGen/X86/avx-load-store.ll =================================================================== --- llvm/test/CodeGen/X86/avx-load-store.ll +++ llvm/test/CodeGen/X86/avx-load-store.ll @@ -175,8 +175,8 @@ ; CHECK_O0: # %bb.0: ; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 -; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> @@ -197,8 +197,8 @@ ; CHECK_O0: # %bb.0: ; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 -; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %Z = shufflevector <4 x i32>%A, <4 x i32>%B, <8 x i32> @@ -239,10 +239,10 @@ ; CHECK_O0-NEXT: .LBB9_3: # %cif_mixed_test_all ; CHECK_O0-NEXT: vmovdqa {{.*#+}} xmm0 = [4294967295,0,0,0] ; CHECK_O0-NEXT: vmovdqa %xmm0, %xmm0 -; CHECK_O0-NEXT: vmovaps %xmm0, %xmm1 +; CHECK_O0-NEXT: # kill: def $ymm0 killed $xmm0 ; CHECK_O0-NEXT: # implicit-def: $rax -; CHECK_O0-NEXT: # implicit-def: $ymm2 -; CHECK_O0-NEXT: vmaskmovps %ymm2, %ymm1, (%rax) +; CHECK_O0-NEXT: # implicit-def: $ymm1 +; CHECK_O0-NEXT: vmaskmovps %ymm1, %ymm0, (%rax) ; CHECK_O0-NEXT: .LBB9_4: # %cif_mixed_test_any_check allocas: br i1 undef, label %cif_mask_all, label %cif_mask_mixed @@ -276,8 +276,8 @@ ; CHECK_O0-NEXT: vmovdqu 16(%rsi), %xmm1 ; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 -; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %b = load <8 x i32>, <8 x i32>* %bp, align 1 @@ -321,8 +321,8 @@ ; CHECK_O0-NEXT: vmovdqa 16(%rsi), %xmm1 ; CHECK_O0-NEXT: # implicit-def: $ymm2 ; CHECK_O0-NEXT: vmovaps %xmm0, %xmm2 -; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm2 -; CHECK_O0-NEXT: vmovdqu %ymm2, (%rdi) +; CHECK_O0-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm0 +; CHECK_O0-NEXT: vmovdqu %ymm0, (%rdi) ; CHECK_O0-NEXT: vzeroupper ; CHECK_O0-NEXT: retq %b = load <4 x i64>, <4 x i64>* %bp, align 16 Index: llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll =================================================================== --- llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll +++ llvm/test/CodeGen/X86/avx512-mask-zext-bugfix.ll @@ -40,22 +40,20 @@ ; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 16-byte Reload ; CHECK-NEXT: vpmovd2m %xmm0, %k0 ; CHECK-NEXT: kmovq %k0, %k1 -; CHECK-NEXT: kmovd %k0, %esi -; CHECK-NEXT: ## kill: def $sil killed $sil killed $esi -; CHECK-NEXT: movzbl %sil, %edi -; CHECK-NEXT: ## kill: def $di killed $di killed $edi -; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload -; CHECK-NEXT: movw %di, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill -; CHECK-NEXT: movq %rcx, %rdi -; CHECK-NEXT: movl $4, %r8d -; CHECK-NEXT: movl %r8d, %esi -; CHECK-NEXT: movl %r8d, %edx +; CHECK-NEXT: kmovd %k0, %ecx +; CHECK-NEXT: ## kill: def $cl killed $cl killed $ecx +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: ## kill: def $cx killed $cx killed $ecx +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdi ## 8-byte Reload +; CHECK-NEXT: movl $4, %edx +; CHECK-NEXT: movl %edx, %esi ; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Spill ; CHECK-NEXT: kmovw %k1, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill +; CHECK-NEXT: movw %cx, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill ; CHECK-NEXT: callq _calc_expected_mask_val ; CHECK-NEXT: ## kill: def $ax killed $ax killed $rax -; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %r9w ## 2-byte Reload -; CHECK-NEXT: movzwl %r9w, %edi +; CHECK-NEXT: movw {{[-0-9]+}}(%r{{[sb]}}p), %cx ## 2-byte Reload +; CHECK-NEXT: movzwl %cx, %edi ; CHECK-NEXT: movzwl %ax, %esi ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx ## 8-byte Reload ; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx ## 8-byte Reload Index: llvm/test/CodeGen/X86/crash-O0.ll =================================================================== --- llvm/test/CodeGen/X86/crash-O0.ll +++ llvm/test/CodeGen/X86/crash-O0.ll @@ -79,11 +79,12 @@ ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: ## kill: def $rax killed $eax +; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: cqto -; CHECK-NEXT: movslq %edi, %rsi -; CHECK-NEXT: idivq (%rcx,%rsi,8) +; CHECK-NEXT: movslq %edi, %rcx +; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rsi ## 8-byte Reload +; CHECK-NEXT: idivq (%rsi,%rcx,8) ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: retq %gep = getelementptr i64, i64* null, i32 %V Index: llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll =================================================================== --- llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll +++ llvm/test/CodeGen/X86/extend-set-cc-uses-dbg.ll @@ -7,8 +7,8 @@ bb: %tmp = load i32, i32* %p, align 4, !dbg !7 ; CHECK: $eax = MOV32rm killed {{.*}} $rdi, {{.*}} debug-location !7 :: (load 4 from %ir.p) - ; CHECK-NEXT: $ecx = MOV32rr killed $eax, implicit-def $rcx, debug-location !7 - ; CHECK-NEXT: $rdx = MOV64rr $rcx, debug-location !7 + ; CHECK-NEXT: $rax = KILL killed renamable $eax, debug-location !7 + ; CHECK-NEXT: $rcx = MOV64rr $rax, debug-location !7 switch i32 %tmp, label %bb7 [ i32 0, label %bb1 Index: llvm/test/CodeGen/X86/fast-isel-nontemporal.ll =================================================================== --- llvm/test/CodeGen/X86/fast-isel-nontemporal.ll +++ llvm/test/CodeGen/X86/fast-isel-nontemporal.ll @@ -1013,11 +1013,11 @@ ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm2, %xmm1 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm2 +; AVX1-NEXT: vmovaps %xmm1, %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xfloat: @@ -1067,11 +1067,11 @@ ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm2, %xmm1 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm2 +; AVX1-NEXT: vmovaps %xmm1, %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xdouble: @@ -1121,11 +1121,11 @@ ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm2, %xmm1 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm2 +; AVX1-NEXT: vmovaps %xmm1, %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt64xi8: @@ -1175,11 +1175,11 @@ ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm2, %xmm1 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm2 +; AVX1-NEXT: vmovaps %xmm1, %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt32xi16: @@ -1229,11 +1229,11 @@ ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm2, %xmm1 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm2 +; AVX1-NEXT: vmovaps %xmm1, %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt16xi32: @@ -1283,11 +1283,11 @@ ; AVX1-NEXT: vmovaps %xmm0, %xmm1 ; AVX1-NEXT: vmovntdqa 16(%rdi), %xmm0 ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 -; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm2 -; AVX1-NEXT: # implicit-def: $ymm1 -; AVX1-NEXT: vmovaps %xmm2, %xmm1 -; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm2 -; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 +; AVX1-NEXT: vmovntdqa 32(%rdi), %xmm1 +; AVX1-NEXT: # implicit-def: $ymm2 +; AVX1-NEXT: vmovaps %xmm1, %xmm2 +; AVX1-NEXT: vmovntdqa 48(%rdi), %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX1-NEXT: retq ; ; AVX2-LABEL: test_load_nt8xi64: Index: llvm/test/CodeGen/X86/pr1489.ll =================================================================== --- llvm/test/CodeGen/X86/pr1489.ll +++ llvm/test/CodeGen/X86/pr1489.ll @@ -16,9 +16,9 @@ ; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E ; CHECK-NEXT: calll _lrintf ; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: setl %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: setl %al +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl @@ -42,9 +42,9 @@ ; CHECK-NEXT: movl $-1236950581, (%eax) ## imm = 0xB645A1CB ; CHECK-NEXT: calll _lrint ; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: setl %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: setl %al +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl @@ -67,9 +67,9 @@ ; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E ; CHECK-NEXT: calll _lrintf ; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: setl %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: setl %al +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl @@ -90,9 +90,9 @@ ; CHECK-NEXT: movl $1082126238, (%eax) ## imm = 0x407FEF9E ; CHECK-NEXT: calll _lrintf ; CHECK-NEXT: cmpl $1, %eax -; CHECK-NEXT: setl %cl -; CHECK-NEXT: andb $1, %cl -; CHECK-NEXT: movzbl %cl, %eax +; CHECK-NEXT: setl %al +; CHECK-NEXT: andb $1, %al +; CHECK-NEXT: movzbl %al, %eax ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: popl %ebp ; CHECK-NEXT: retl Index: llvm/test/CodeGen/X86/pr27591.ll =================================================================== --- llvm/test/CodeGen/X86/pr27591.ll +++ llvm/test/CodeGen/X86/pr27591.ll @@ -9,9 +9,9 @@ ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: cmpl $0, %edi ; CHECK-NEXT: setne %al -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: andl $1, %ecx -; CHECK-NEXT: movl %ecx, %edi +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: movl %eax, %edi ; CHECK-NEXT: callq callee1 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq @@ -27,10 +27,10 @@ ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: cmpl $0, %edi ; CHECK-NEXT: setne %al -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: andl $1, %ecx -; CHECK-NEXT: negl %ecx -; CHECK-NEXT: movl %ecx, %edi +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: andl $1, %eax +; CHECK-NEXT: negl %eax +; CHECK-NEXT: movl %eax, %edi ; CHECK-NEXT: callq callee2 ; CHECK-NEXT: popq %rax ; CHECK-NEXT: retq Index: llvm/test/CodeGen/X86/pr30430.ll =================================================================== --- llvm/test/CodeGen/X86/pr30430.ll +++ llvm/test/CodeGen/X86/pr30430.ll @@ -75,28 +75,28 @@ ; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] ; CHECK-NEXT: # implicit-def: $ymm2 ; CHECK-NEXT: vmovaps %xmm1, %xmm2 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2 -; CHECK-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] -; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[0] +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 ; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0],xmm1[3] +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm2[0] +; CHECK-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm3[0],xmm1[0],xmm3[2,3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm3[0],xmm1[3] +; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm3[0],xmm2[3] ; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero -; CHECK-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[0] +; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm3[0] ; CHECK-NEXT: # implicit-def: $ymm3 -; CHECK-NEXT: vmovaps %xmm1, %xmm3 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm3 -; CHECK-NEXT: # implicit-def: $zmm24 -; CHECK-NEXT: vmovaps %zmm3, %zmm24 -; CHECK-NEXT: vinsertf64x4 $1, %ymm2, %zmm24, %zmm24 -; CHECK-NEXT: vmovaps %zmm24, {{[0-9]+}}(%rsp) +; CHECK-NEXT: vmovaps %xmm2, %xmm3 +; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1 +; CHECK-NEXT: # implicit-def: $zmm2 +; CHECK-NEXT: vmovaps %ymm1, %ymm2 +; CHECK-NEXT: vinsertf64x4 $1, %ymm0, %zmm2, %zmm0 +; CHECK-NEXT: vmovaps %zmm0, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vmovaps {{[0-9]+}}(%rsp), %zmm0 ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp Index: llvm/test/CodeGen/X86/pr30813.ll =================================================================== --- llvm/test/CodeGen/X86/pr30813.ll +++ llvm/test/CodeGen/X86/pr30813.ll @@ -1,9 +1,8 @@ ; RUN: llc -mtriple=x86_64-linux-gnu -O0 %s -o - | FileCheck %s ; CHECK: patatino: ; CHECK: .cfi_startproc -; CHECK: movzwl (%rax), [[REG0:%e[abcd]x]] -; CHECK: movl [[REG0]], %e[[REG1C:[abcd]]]x -; CHECK: movq %r[[REG1C]]x, ({{%r[abcd]x}}) +; CHECK: movzwl (%rax), %e[[REG0:[abcd]x]] +; CHECK: movq %r[[REG0]], ({{%r[abcd]x}}) ; CHECK: retq define void @patatino() { Index: llvm/test/CodeGen/X86/pr32241.ll =================================================================== --- llvm/test/CodeGen/X86/pr32241.ll +++ llvm/test/CodeGen/X86/pr32241.ll @@ -23,14 +23,14 @@ ; CHECK-NEXT: .LBB0_2: # %lor.end ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; CHECK-NEXT: cmpl %ecx, %edx +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; CHECK-NEXT: cmpl %eax, %ecx ; CHECK-NEXT: setl %al ; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: xorl $-1, %ecx -; CHECK-NEXT: cmpl $0, %ecx +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: xorl $-1, %eax +; CHECK-NEXT: cmpl $0, %eax ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill ; CHECK-NEXT: jne .LBB0_4 @@ -42,9 +42,9 @@ ; CHECK-NEXT: .LBB0_4: # %lor.end5 ; CHECK-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %al # 1-byte Reload ; CHECK-NEXT: andb $1, %al -; CHECK-NEXT: movzbl %al, %ecx -; CHECK-NEXT: # kill: def $cx killed $cx killed $ecx -; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp) +; CHECK-NEXT: movzbl %al, %eax +; CHECK-NEXT: # kill: def $ax killed $ax killed $eax +; CHECK-NEXT: movw %ax, {{[0-9]+}}(%esp) ; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: addl $16, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 4 Index: llvm/test/CodeGen/X86/pr32284.ll =================================================================== --- llvm/test/CodeGen/X86/pr32284.ll +++ llvm/test/CodeGen/X86/pr32284.ll @@ -10,28 +10,28 @@ ; X86-O0-LABEL: foo: ; X86-O0: # %bb.0: # %entry ; X86-O0-NEXT: xorl %eax, %eax -; X86-O0-NEXT: movl %eax, %ecx -; X86-O0-NEXT: xorl %eax, %eax +; X86-O0-NEXT: # kill: def $rax killed $eax +; X86-O0-NEXT: xorl %ecx, %ecx ; X86-O0-NEXT: movzbl c, %edx -; X86-O0-NEXT: subl %edx, %eax -; X86-O0-NEXT: movslq %eax, %rsi -; X86-O0-NEXT: subq %rsi, %rcx -; X86-O0-NEXT: # kill: def $cl killed $cl killed $rcx -; X86-O0-NEXT: cmpb $0, %cl -; X86-O0-NEXT: setne %cl -; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movb %cl, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: subl %edx, %ecx +; X86-O0-NEXT: movslq %ecx, %rcx +; X86-O0-NEXT: subq %rcx, %rax +; X86-O0-NEXT: # kill: def $al killed $al killed $rax +; X86-O0-NEXT: cmpb $0, %al +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; X86-O0-NEXT: cmpb $0, c -; X86-O0-NEXT: setne %cl -; X86-O0-NEXT: xorb $-1, %cl -; X86-O0-NEXT: xorb $-1, %cl -; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %eax -; X86-O0-NEXT: movzbl c, %edx -; X86-O0-NEXT: cmpl %edx, %eax -; X86-O0-NEXT: setle %cl -; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %eax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: movzbl c, %ecx +; X86-O0-NEXT: cmpl %ecx, %eax +; X86-O0-NEXT: setle %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax ; X86-O0-NEXT: movl %eax, -{{[0-9]+}}(%rsp) ; X86-O0-NEXT: retq ; @@ -63,13 +63,13 @@ ; 686-O0-NEXT: xorb $-1, %al ; 686-O0-NEXT: xorb $-1, %al ; 686-O0-NEXT: andb $1, %al -; 686-O0-NEXT: movzbl %al, %ecx -; 686-O0-NEXT: movzbl c, %edx -; 686-O0-NEXT: cmpl %edx, %ecx +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movzbl c, %ecx +; 686-O0-NEXT: cmpl %ecx, %eax ; 686-O0-NEXT: setle %al ; 686-O0-NEXT: andb $1, %al -; 686-O0-NEXT: movzbl %al, %ecx -; 686-O0-NEXT: movl %ecx, (%esp) +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movl %eax, (%esp) ; 686-O0-NEXT: addl $8, %esp ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl @@ -126,33 +126,33 @@ ; X86-O0-NEXT: movabsq $8381627093, %rcx # imm = 0x1F3957AD5 ; X86-O0-NEXT: addq %rcx, %rax ; X86-O0-NEXT: cmpq $0, %rax -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movb %dl, -{{[0-9]+}}(%rsp) -; X86-O0-NEXT: movl var_5, %esi -; X86-O0-NEXT: xorl $-1, %esi -; X86-O0-NEXT: cmpl $0, %esi -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: xorb $-1, %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %esi -; X86-O0-NEXT: movl %esi, %eax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movb %al, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: movl var_5, %eax +; X86-O0-NEXT: xorl $-1, %eax +; X86-O0-NEXT: cmpl $0, %eax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: movslq var_5, %rcx ; X86-O0-NEXT: addq $7093, %rcx # imm = 0x1BB5 ; X86-O0-NEXT: cmpq %rcx, %rax -; X86-O0-NEXT: setg %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %esi -; X86-O0-NEXT: movl %esi, %eax +; X86-O0-NEXT: setg %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: movq %rax, var_57 -; X86-O0-NEXT: movl var_5, %esi -; X86-O0-NEXT: xorl $-1, %esi -; X86-O0-NEXT: cmpl $0, %esi -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: xorb $-1, %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %esi -; X86-O0-NEXT: movl %esi, %eax +; X86-O0-NEXT: movl var_5, %eax +; X86-O0-NEXT: xorl $-1, %eax +; X86-O0-NEXT: cmpl $0, %eax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: movq %rax, _ZN8struct_210member_2_0E ; X86-O0-NEXT: retq ; @@ -178,20 +178,17 @@ ; ; 686-O0-LABEL: f1: ; 686-O0: # %bb.0: # %entry -; 686-O0-NEXT: pushl %ebp -; 686-O0-NEXT: .cfi_def_cfa_offset 8 ; 686-O0-NEXT: pushl %ebx -; 686-O0-NEXT: .cfi_def_cfa_offset 12 +; 686-O0-NEXT: .cfi_def_cfa_offset 8 ; 686-O0-NEXT: pushl %edi -; 686-O0-NEXT: .cfi_def_cfa_offset 16 +; 686-O0-NEXT: .cfi_def_cfa_offset 12 ; 686-O0-NEXT: pushl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 20 +; 686-O0-NEXT: .cfi_def_cfa_offset 16 ; 686-O0-NEXT: subl $1, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 21 -; 686-O0-NEXT: .cfi_offset %esi, -20 -; 686-O0-NEXT: .cfi_offset %edi, -16 -; 686-O0-NEXT: .cfi_offset %ebx, -12 -; 686-O0-NEXT: .cfi_offset %ebp, -8 +; 686-O0-NEXT: .cfi_def_cfa_offset 17 +; 686-O0-NEXT: .cfi_offset %esi, -16 +; 686-O0-NEXT: .cfi_offset %edi, -12 +; 686-O0-NEXT: .cfi_offset %ebx, -8 ; 686-O0-NEXT: movl var_5, %eax ; 686-O0-NEXT: movl %eax, %ecx ; 686-O0-NEXT: sarl $31, %ecx @@ -217,18 +214,16 @@ ; 686-O0-NEXT: movl var_5, %edi ; 686-O0-NEXT: subl $-1, %edi ; 686-O0-NEXT: sete %bl -; 686-O0-NEXT: movzbl %bl, %ebp -; 686-O0-NEXT: movl %ebp, _ZN8struct_210member_2_0E +; 686-O0-NEXT: movzbl %bl, %ebx +; 686-O0-NEXT: movl %ebx, _ZN8struct_210member_2_0E ; 686-O0-NEXT: movl $0, _ZN8struct_210member_2_0E+4 ; 686-O0-NEXT: addl $1, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 20 -; 686-O0-NEXT: popl %esi ; 686-O0-NEXT: .cfi_def_cfa_offset 16 -; 686-O0-NEXT: popl %edi +; 686-O0-NEXT: popl %esi ; 686-O0-NEXT: .cfi_def_cfa_offset 12 -; 686-O0-NEXT: popl %ebx +; 686-O0-NEXT: popl %edi ; 686-O0-NEXT: .cfi_def_cfa_offset 8 -; 686-O0-NEXT: popl %ebp +; 686-O0-NEXT: popl %ebx ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl ; @@ -310,25 +305,25 @@ ; X86-O0-NEXT: setne %cl ; X86-O0-NEXT: xorb $-1, %cl ; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %edx -; X86-O0-NEXT: xorl %edx, %eax +; X86-O0-NEXT: movzbl %cl, %ecx +; X86-O0-NEXT: xorl %ecx, %eax ; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax ; X86-O0-NEXT: movw %ax, -{{[0-9]+}}(%rsp) -; X86-O0-NEXT: movzbl var_7, %edx -; X86-O0-NEXT: # kill: def $dx killed $dx killed $edx -; X86-O0-NEXT: cmpw $0, %dx -; X86-O0-NEXT: setne %cl -; X86-O0-NEXT: xorb $-1, %cl -; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %esi -; X86-O0-NEXT: movzbl var_7, %edi -; X86-O0-NEXT: cmpl %edi, %esi -; X86-O0-NEXT: sete %cl -; X86-O0-NEXT: andb $1, %cl -; X86-O0-NEXT: movzbl %cl, %esi -; X86-O0-NEXT: # kill: def $si killed $si killed $esi -; X86-O0-NEXT: # implicit-def: $r8 -; X86-O0-NEXT: movw %si, (%r8) +; X86-O0-NEXT: movzbl var_7, %eax +; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax +; X86-O0-NEXT: cmpw $0, %ax +; X86-O0-NEXT: setne %al +; X86-O0-NEXT: xorb $-1, %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: movzbl var_7, %ecx +; X86-O0-NEXT: cmpl %ecx, %eax +; X86-O0-NEXT: sete %al +; X86-O0-NEXT: andb $1, %al +; X86-O0-NEXT: movzbl %al, %eax +; X86-O0-NEXT: # kill: def $ax killed $ax killed $eax +; X86-O0-NEXT: # implicit-def: $rcx +; X86-O0-NEXT: movw %ax, (%rcx) ; X86-O0-NEXT: retq ; ; X64-LABEL: f2: @@ -350,43 +345,33 @@ ; ; 686-O0-LABEL: f2: ; 686-O0: # %bb.0: # %entry -; 686-O0-NEXT: pushl %edi -; 686-O0-NEXT: .cfi_def_cfa_offset 8 -; 686-O0-NEXT: pushl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 12 ; 686-O0-NEXT: subl $2, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 14 -; 686-O0-NEXT: .cfi_offset %esi, -12 -; 686-O0-NEXT: .cfi_offset %edi, -8 +; 686-O0-NEXT: .cfi_def_cfa_offset 6 ; 686-O0-NEXT: movzbl var_7, %eax ; 686-O0-NEXT: cmpb $0, var_7 ; 686-O0-NEXT: setne %cl ; 686-O0-NEXT: xorb $-1, %cl ; 686-O0-NEXT: andb $1, %cl -; 686-O0-NEXT: movzbl %cl, %edx -; 686-O0-NEXT: xorl %edx, %eax +; 686-O0-NEXT: movzbl %cl, %ecx +; 686-O0-NEXT: xorl %ecx, %eax ; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax ; 686-O0-NEXT: movw %ax, (%esp) -; 686-O0-NEXT: movzbl var_7, %edx -; 686-O0-NEXT: # kill: def $dx killed $dx killed $edx -; 686-O0-NEXT: cmpw $0, %dx -; 686-O0-NEXT: setne %cl -; 686-O0-NEXT: xorb $-1, %cl -; 686-O0-NEXT: andb $1, %cl -; 686-O0-NEXT: movzbl %cl, %esi -; 686-O0-NEXT: movzbl var_7, %edi -; 686-O0-NEXT: cmpl %edi, %esi -; 686-O0-NEXT: sete %cl -; 686-O0-NEXT: andb $1, %cl -; 686-O0-NEXT: movzbl %cl, %esi -; 686-O0-NEXT: # kill: def $si killed $si killed $esi -; 686-O0-NEXT: # implicit-def: $edi -; 686-O0-NEXT: movw %si, (%edi) +; 686-O0-NEXT: movzbl var_7, %eax +; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax +; 686-O0-NEXT: cmpw $0, %ax +; 686-O0-NEXT: setne %al +; 686-O0-NEXT: xorb $-1, %al +; 686-O0-NEXT: andb $1, %al +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movzbl var_7, %ecx +; 686-O0-NEXT: cmpl %ecx, %eax +; 686-O0-NEXT: sete %al +; 686-O0-NEXT: andb $1, %al +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: # kill: def $ax killed $ax killed $eax +; 686-O0-NEXT: # implicit-def: $ecx +; 686-O0-NEXT: movw %ax, (%ecx) ; 686-O0-NEXT: addl $2, %esp -; 686-O0-NEXT: .cfi_def_cfa_offset 12 -; 686-O0-NEXT: popl %esi -; 686-O0-NEXT: .cfi_def_cfa_offset 8 -; 686-O0-NEXT: popl %edi ; 686-O0-NEXT: .cfi_def_cfa_offset 4 ; 686-O0-NEXT: retl ; @@ -446,35 +431,35 @@ ; X86-O0-NEXT: movl var_13, %eax ; X86-O0-NEXT: xorl $-1, %eax ; X86-O0-NEXT: movl %eax, %eax -; X86-O0-NEXT: movl %eax, %ecx +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: cmpl $0, var_13 -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: xorb $-1, %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %eax -; X86-O0-NEXT: movl %eax, %esi -; X86-O0-NEXT: movl var_13, %eax -; X86-O0-NEXT: xorl $-1, %eax -; X86-O0-NEXT: xorl var_16, %eax -; X86-O0-NEXT: movl %eax, %eax -; X86-O0-NEXT: movl %eax, %edi -; X86-O0-NEXT: andq %rdi, %rsi -; X86-O0-NEXT: orq %rsi, %rcx -; X86-O0-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) +; X86-O0-NEXT: setne %cl +; X86-O0-NEXT: xorb $-1, %cl +; X86-O0-NEXT: andb $1, %cl +; X86-O0-NEXT: movzbl %cl, %ecx +; X86-O0-NEXT: # kill: def $rcx killed $ecx +; X86-O0-NEXT: movl var_13, %edx +; X86-O0-NEXT: xorl $-1, %edx +; X86-O0-NEXT: xorl var_16, %edx +; X86-O0-NEXT: movl %edx, %edx +; X86-O0-NEXT: # kill: def $rdx killed $edx +; X86-O0-NEXT: andq %rdx, %rcx +; X86-O0-NEXT: orq %rcx, %rax +; X86-O0-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; X86-O0-NEXT: movl var_13, %eax ; X86-O0-NEXT: xorl $-1, %eax ; X86-O0-NEXT: movl %eax, %eax -; X86-O0-NEXT: movl %eax, %ecx +; X86-O0-NEXT: # kill: def $rax killed $eax ; X86-O0-NEXT: cmpl $0, var_13 -; X86-O0-NEXT: setne %dl -; X86-O0-NEXT: xorb $-1, %dl -; X86-O0-NEXT: andb $1, %dl -; X86-O0-NEXT: movzbl %dl, %eax -; X86-O0-NEXT: movl %eax, %esi -; X86-O0-NEXT: andq $0, %rsi -; X86-O0-NEXT: orq %rsi, %rcx -; X86-O0-NEXT: # kill: def $ecx killed $ecx killed $rcx -; X86-O0-NEXT: movl %ecx, var_46 +; X86-O0-NEXT: setne %cl +; X86-O0-NEXT: xorb $-1, %cl +; X86-O0-NEXT: andb $1, %cl +; X86-O0-NEXT: movzbl %cl, %ecx +; X86-O0-NEXT: # kill: def $rcx killed $ecx +; X86-O0-NEXT: andq $0, %rcx +; X86-O0-NEXT: orq %rcx, %rax +; X86-O0-NEXT: # kill: def $eax killed $eax killed $rax +; X86-O0-NEXT: movl %eax, var_46 ; X86-O0-NEXT: retq ; ; X64-LABEL: f3: @@ -499,31 +484,28 @@ ; 686-O0-NEXT: .cfi_offset %ebp, -8 ; 686-O0-NEXT: movl %esp, %ebp ; 686-O0-NEXT: .cfi_def_cfa_register %ebp -; 686-O0-NEXT: pushl %edi ; 686-O0-NEXT: pushl %esi ; 686-O0-NEXT: andl $-8, %esp -; 686-O0-NEXT: subl $8, %esp -; 686-O0-NEXT: .cfi_offset %esi, -16 -; 686-O0-NEXT: .cfi_offset %edi, -12 +; 686-O0-NEXT: subl $16, %esp +; 686-O0-NEXT: .cfi_offset %esi, -12 ; 686-O0-NEXT: movl var_13, %eax ; 686-O0-NEXT: movl %eax, %ecx ; 686-O0-NEXT: notl %ecx ; 686-O0-NEXT: testl %eax, %eax -; 686-O0-NEXT: sete %dl -; 686-O0-NEXT: movzbl %dl, %eax -; 686-O0-NEXT: movl var_16, %esi -; 686-O0-NEXT: movl %ecx, %edi -; 686-O0-NEXT: xorl %esi, %edi -; 686-O0-NEXT: andl %edi, %eax +; 686-O0-NEXT: sete %al +; 686-O0-NEXT: movzbl %al, %eax +; 686-O0-NEXT: movl var_16, %edx +; 686-O0-NEXT: movl %ecx, %esi +; 686-O0-NEXT: xorl %edx, %esi +; 686-O0-NEXT: andl %esi, %eax ; 686-O0-NEXT: orl %eax, %ecx ; 686-O0-NEXT: movl %ecx, (%esp) ; 686-O0-NEXT: movl $0, {{[0-9]+}}(%esp) ; 686-O0-NEXT: movl var_13, %eax ; 686-O0-NEXT: notl %eax ; 686-O0-NEXT: movl %eax, var_46 -; 686-O0-NEXT: leal -8(%ebp), %esp +; 686-O0-NEXT: leal -4(%ebp), %esp ; 686-O0-NEXT: popl %esi -; 686-O0-NEXT: popl %edi ; 686-O0-NEXT: popl %ebp ; 686-O0-NEXT: .cfi_def_cfa %esp, 4 ; 686-O0-NEXT: retl Index: llvm/test/CodeGen/X86/pr32340.ll =================================================================== --- llvm/test/CodeGen/X86/pr32340.ll +++ llvm/test/CodeGen/X86/pr32340.ll @@ -14,37 +14,37 @@ ; X64-LABEL: foo: ; X64: # %bb.0: # %entry ; X64-NEXT: xorl %eax, %eax -; X64-NEXT: movl %eax, %ecx +; X64-NEXT: # kill: def $rax killed $eax ; X64-NEXT: movw $0, var_825 -; X64-NEXT: movzwl var_32, %eax +; X64-NEXT: movzwl var_32, %ecx ; X64-NEXT: movzwl var_901, %edx -; X64-NEXT: movl %eax, %esi +; X64-NEXT: movl %ecx, %esi ; X64-NEXT: xorl %edx, %esi -; X64-NEXT: movl %eax, %edx +; X64-NEXT: movl %ecx, %edx ; X64-NEXT: xorl %esi, %edx -; X64-NEXT: addl %eax, %edx -; X64-NEXT: movslq %edx, %rdi -; X64-NEXT: movq %rdi, var_826 -; X64-NEXT: movzwl var_32, %eax -; X64-NEXT: movl %eax, %edi -; X64-NEXT: movzwl var_901, %eax -; X64-NEXT: xorl $51981, %eax # imm = 0xCB0D -; X64-NEXT: movslq %eax, %r8 -; X64-NEXT: movabsq $-1142377792914660288, %r9 # imm = 0xF02575732E06E440 -; X64-NEXT: xorq %r9, %r8 -; X64-NEXT: movq %rdi, %r9 -; X64-NEXT: xorq %r8, %r9 -; X64-NEXT: xorq $-1, %r9 -; X64-NEXT: xorq %r9, %rdi -; X64-NEXT: movq %rdi, %r8 -; X64-NEXT: orq var_57, %r8 -; X64-NEXT: orq %r8, %rdi -; X64-NEXT: # kill: def $di killed $di killed $rdi -; X64-NEXT: movw %di, var_900 -; X64-NEXT: cmpq var_28, %rcx -; X64-NEXT: setne %r10b -; X64-NEXT: andb $1, %r10b -; X64-NEXT: movzbl %r10b, %eax +; X64-NEXT: addl %ecx, %edx +; X64-NEXT: movslq %edx, %rcx +; X64-NEXT: movq %rcx, var_826 +; X64-NEXT: movzwl var_32, %ecx +; X64-NEXT: # kill: def $rcx killed $ecx +; X64-NEXT: movzwl var_901, %edx +; X64-NEXT: xorl $51981, %edx # imm = 0xCB0D +; X64-NEXT: movslq %edx, %rdx +; X64-NEXT: movabsq $-1142377792914660288, %rsi # imm = 0xF02575732E06E440 +; X64-NEXT: xorq %rsi, %rdx +; X64-NEXT: movq %rcx, %rsi +; X64-NEXT: xorq %rdx, %rsi +; X64-NEXT: xorq $-1, %rsi +; X64-NEXT: xorq %rsi, %rcx +; X64-NEXT: movq %rcx, %rdx +; X64-NEXT: orq var_57, %rdx +; X64-NEXT: orq %rdx, %rcx +; X64-NEXT: # kill: def $cx killed $cx killed $rcx +; X64-NEXT: movw %cx, var_900 +; X64-NEXT: cmpq var_28, %rax +; X64-NEXT: setne %al +; X64-NEXT: andb $1, %al +; X64-NEXT: movzbl %al, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: movw %ax, var_827 ; X64-NEXT: retq Index: llvm/test/CodeGen/X86/pr32345.ll =================================================================== --- llvm/test/CodeGen/X86/pr32345.ll +++ llvm/test/CodeGen/X86/pr32345.ll @@ -15,23 +15,23 @@ ; X640-NEXT: xorl %ecx, %eax ; X640-NEXT: movzwl var_27, %ecx ; X640-NEXT: xorl %ecx, %eax -; X640-NEXT: movslq %eax, %rdx -; X640-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) +; X640-NEXT: cltq +; X640-NEXT: movq %rax, -{{[0-9]+}}(%rsp) ; X640-NEXT: movzwl var_22, %eax ; X640-NEXT: movzwl var_27, %ecx ; X640-NEXT: xorl %ecx, %eax ; X640-NEXT: movzwl var_27, %ecx ; X640-NEXT: xorl %ecx, %eax -; X640-NEXT: movslq %eax, %rdx -; X640-NEXT: movzwl var_27, %eax -; X640-NEXT: subl $16610, %eax # imm = 0x40E2 -; X640-NEXT: movl %eax, %eax -; X640-NEXT: movl %eax, %ecx +; X640-NEXT: cltq +; X640-NEXT: movzwl var_27, %ecx +; X640-NEXT: subl $16610, %ecx # imm = 0x40E2 +; X640-NEXT: movl %ecx, %ecx +; X640-NEXT: # kill: def $rcx killed $ecx ; X640-NEXT: # kill: def $cl killed $rcx -; X640-NEXT: sarq %cl, %rdx -; X640-NEXT: # kill: def $dl killed $dl killed $rdx -; X640-NEXT: # implicit-def: $rsi -; X640-NEXT: movb %dl, (%rsi) +; X640-NEXT: sarq %cl, %rax +; X640-NEXT: # kill: def $al killed $al killed $rax +; X640-NEXT: # implicit-def: $rcx +; X640-NEXT: movb %al, (%rcx) ; X640-NEXT: retq ; ; 6860-LABEL: foo: @@ -41,43 +41,37 @@ ; 6860-NEXT: .cfi_offset %ebp, -8 ; 6860-NEXT: movl %esp, %ebp ; 6860-NEXT: .cfi_def_cfa_register %ebp -; 6860-NEXT: pushl %ebx -; 6860-NEXT: pushl %edi -; 6860-NEXT: pushl %esi ; 6860-NEXT: andl $-8, %esp -; 6860-NEXT: subl $32, %esp -; 6860-NEXT: .cfi_offset %esi, -20 -; 6860-NEXT: .cfi_offset %edi, -16 -; 6860-NEXT: .cfi_offset %ebx, -12 +; 6860-NEXT: subl $24, %esp ; 6860-NEXT: movw var_22, %ax ; 6860-NEXT: movzwl var_27, %ecx ; 6860-NEXT: movw %cx, %dx ; 6860-NEXT: xorw %dx, %ax -; 6860-NEXT: # implicit-def: $esi -; 6860-NEXT: movw %ax, %si -; 6860-NEXT: xorl %ecx, %esi -; 6860-NEXT: # kill: def $si killed $si killed $esi -; 6860-NEXT: movzwl %si, %ecx -; 6860-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; 6860-NEXT: # implicit-def: $edx +; 6860-NEXT: movw %ax, %dx +; 6860-NEXT: xorl %ecx, %edx +; 6860-NEXT: # kill: def $dx killed $dx killed $edx +; 6860-NEXT: movzwl %dx, %eax +; 6860-NEXT: movl %eax, {{[0-9]+}}(%esp) ; 6860-NEXT: movl $0, {{[0-9]+}}(%esp) ; 6860-NEXT: movw var_22, %ax ; 6860-NEXT: movzwl var_27, %ecx ; 6860-NEXT: movw %cx, %dx ; 6860-NEXT: xorw %dx, %ax -; 6860-NEXT: # implicit-def: $edi -; 6860-NEXT: movw %ax, %di -; 6860-NEXT: xorl %ecx, %edi -; 6860-NEXT: # kill: def $di killed $di killed $edi -; 6860-NEXT: movzwl %di, %ebx +; 6860-NEXT: # implicit-def: $edx +; 6860-NEXT: movw %ax, %dx +; 6860-NEXT: xorl %ecx, %edx +; 6860-NEXT: # kill: def $dx killed $dx killed $edx +; 6860-NEXT: movzwl %dx, %eax ; 6860-NEXT: # kill: def $cl killed $cl killed $ecx ; 6860-NEXT: addb $30, %cl -; 6860-NEXT: xorl %eax, %eax +; 6860-NEXT: xorl %edx, %edx ; 6860-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; 6860-NEXT: shrdl %cl, %eax, %ebx +; 6860-NEXT: shrdl %cl, %edx, %eax ; 6860-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Reload ; 6860-NEXT: testb $32, %cl -; 6860-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; 6860-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; 6860-NEXT: jne .LBB0_2 ; 6860-NEXT: # %bb.1: # %bb ; 6860-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload @@ -87,10 +81,7 @@ ; 6860-NEXT: # kill: def $al killed $al killed $eax ; 6860-NEXT: # implicit-def: $ecx ; 6860-NEXT: movb %al, (%ecx) -; 6860-NEXT: leal -12(%ebp), %esp -; 6860-NEXT: popl %esi -; 6860-NEXT: popl %edi -; 6860-NEXT: popl %ebx +; 6860-NEXT: movl %ebp, %esp ; 6860-NEXT: popl %ebp ; 6860-NEXT: .cfi_def_cfa %esp, 4 ; 6860-NEXT: retl Index: llvm/test/CodeGen/X86/pr32451.ll =================================================================== --- llvm/test/CodeGen/X86/pr32451.ll +++ llvm/test/CodeGen/X86/pr32451.ll @@ -9,29 +9,24 @@ define i8** @japi1_convert_690(i8**, i8***, i32) { ; CHECK-LABEL: japi1_convert_690: ; CHECK: # %bb.0: # %top -; CHECK-NEXT: pushl %ebx -; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: subl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: .cfi_offset %ebx, -8 +; CHECK-NEXT: .cfi_def_cfa_offset 20 ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll julia.gc_root_decl -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll jl_get_ptls_states -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl 4(%ecx), %edx -; CHECK-NEXT: movb (%edx), %bl -; CHECK-NEXT: andb $1, %bl -; CHECK-NEXT: movzbl %bl, %edx +; CHECK-NEXT: movb (%edx), %dl +; CHECK-NEXT: andb $1, %dl +; CHECK-NEXT: movzbl %dl, %edx ; CHECK-NEXT: movl %edx, (%esp) -; CHECK-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill +; CHECK-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; CHECK-NEXT: calll jl_box_int32 -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx # 4-byte Reload +; CHECK-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; CHECK-NEXT: movl %eax, (%ecx) ; CHECK-NEXT: addl $16, %esp -; CHECK-NEXT: .cfi_def_cfa_offset 8 -; CHECK-NEXT: popl %ebx ; CHECK-NEXT: .cfi_def_cfa_offset 4 ; CHECK-NEXT: retl top: Index: llvm/test/CodeGen/X86/pr34592.ll =================================================================== --- llvm/test/CodeGen/X86/pr34592.ll +++ llvm/test/CodeGen/X86/pr34592.ll @@ -10,7 +10,7 @@ ; CHECK-NEXT: movq %rsp, %rbp ; CHECK-NEXT: .cfi_def_cfa_register %rbp ; CHECK-NEXT: andq $-32, %rsp -; CHECK-NEXT: subq $192, %rsp +; CHECK-NEXT: subq $160, %rsp ; CHECK-NEXT: vmovaps 240(%rbp), %ymm8 ; CHECK-NEXT: vmovaps 208(%rbp), %ymm9 ; CHECK-NEXT: vmovaps 176(%rbp), %ymm10 @@ -27,14 +27,14 @@ ; CHECK-NEXT: vpalignr {{.*#+}} ymm2 = ymm2[8,9,10,11,12,13,14,15],ymm11[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm11[16,17,18,19,20,21,22,23] ; CHECK-NEXT: vpermq {{.*#+}} ymm2 = ymm2[2,3,2,0] ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4,5],ymm2[6,7] -; CHECK-NEXT: vmovaps %xmm7, %xmm9 -; CHECK-NEXT: vpslldq {{.*#+}} xmm9 = zero,zero,zero,zero,zero,zero,zero,zero,xmm9[0,1,2,3,4,5,6,7] -; CHECK-NEXT: # implicit-def: $ymm2 -; CHECK-NEXT: vmovaps %xmm9, %xmm2 -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm11 # 32-byte Reload -; CHECK-NEXT: vpalignr {{.*#+}} ymm9 = ymm11[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm11[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] -; CHECK-NEXT: vpermq {{.*#+}} ymm9 = ymm9[0,1,0,3] -; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm9[4,5,6,7] +; CHECK-NEXT: vmovaps %xmm7, %xmm2 +; CHECK-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6,7] +; CHECK-NEXT: # implicit-def: $ymm9 +; CHECK-NEXT: vmovaps %xmm2, %xmm9 +; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm2 # 32-byte Reload +; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm2[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] +; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3] +; CHECK-NEXT: vpblendd {{.*#+}} ymm9 = ymm9[0,1,2,3],ymm11[4,5,6,7] ; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7] ; CHECK-NEXT: vpermq {{.*#+}} ymm8 = ymm8[2,1,1,3] ; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5] @@ -43,14 +43,11 @@ ; CHECK-NEXT: vmovq {{.*#+}} xmm7 = xmm7[0],zero ; CHECK-NEXT: # implicit-def: $ymm8 ; CHECK-NEXT: vmovaps %xmm7, %xmm8 -; CHECK-NEXT: vperm2i128 {{.*#+}} ymm6 = ymm8[0,1],ymm6[0,1] +; CHECK-NEXT: vperm2i128 {{.*#+}} ymm2 = ymm8[0,1],ymm6[0,1] ; CHECK-NEXT: vmovaps %ymm1, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill ; CHECK-NEXT: vmovaps %ymm5, %ymm1 -; CHECK-NEXT: vmovaps %ymm2, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm6, %ymm2 -; CHECK-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %ymm5 # 32-byte Reload ; CHECK-NEXT: vmovaps %ymm3, (%rsp) # 32-byte Spill -; CHECK-NEXT: vmovaps %ymm5, %ymm3 +; CHECK-NEXT: vmovaps %ymm9, %ymm3 ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 Index: llvm/test/CodeGen/X86/pr39733.ll =================================================================== --- llvm/test/CodeGen/X86/pr39733.ll +++ llvm/test/CodeGen/X86/pr39733.ll @@ -23,8 +23,8 @@ ; CHECK-NEXT: vmovaps %xmm1, %xmm2 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] ; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0 -; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm2 -; CHECK-NEXT: vmovdqa %ymm2, (%rsp) +; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 +; CHECK-NEXT: vmovdqa %ymm0, (%rsp) ; CHECK-NEXT: movq %rbp, %rsp ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 Index: llvm/test/CodeGen/X86/pr44749.ll =================================================================== --- llvm/test/CodeGen/X86/pr44749.ll +++ llvm/test/CodeGen/X86/pr44749.ll @@ -14,20 +14,22 @@ ; CHECK-NEXT: movsd %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: callq _b ; CHECK-NEXT: cvtsi2sd %eax, %xmm0 -; CHECK-NEXT: movq _calloc@{{.*}}(%rip), %rcx -; CHECK-NEXT: subq $-1, %rcx -; CHECK-NEXT: setne %dl -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi +; CHECK-NEXT: movq _calloc@{{.*}}(%rip), %rax +; CHECK-NEXT: subq $-1, %rax +; CHECK-NEXT: setne %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: ## kill: def $rcx killed $ecx +; CHECK-NEXT: leaq {{.*}}(%rip), %rdx ; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero ; CHECK-NEXT: ucomisd %xmm1, %xmm0 -; CHECK-NEXT: setae %dl -; CHECK-NEXT: movzbl %dl, %eax -; CHECK-NEXT: movl %eax, %esi -; CHECK-NEXT: leaq {{.*}}(%rip), %rdi +; CHECK-NEXT: setae %cl +; CHECK-NEXT: movzbl %cl, %ecx +; CHECK-NEXT: ## kill: def $rcx killed $ecx +; CHECK-NEXT: leaq {{.*}}(%rip), %rdx ; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: cvttsd2si %xmm0, %eax +; CHECK-NEXT: cvttsd2si %xmm0, %ecx +; CHECK-NEXT: movq %rax, (%rsp) ## 8-byte Spill +; CHECK-NEXT: movl %ecx, %eax ; CHECK-NEXT: addq $24, %rsp ; CHECK-NEXT: retq entry: Index: llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir =================================================================== --- llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir +++ llvm/test/CodeGen/X86/regalloc-fast-missing-live-out-spill.mir @@ -23,15 +23,15 @@ ; CHECK: successors: %bb.3(0x80000000) ; CHECK: $rax = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1) ; CHECK: renamable $ecx = MOV32r0 implicit-def $eflags - ; CHECK: renamable $rdx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit + ; CHECK: renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit ; CHECK: MOV64mi32 killed renamable $rax, 1, $noreg, 0, $noreg, 0 :: (volatile store 8) - ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rdx :: (store 8 into %stack.0) + ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.0) ; CHECK: bb.3: ; CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) ; CHECK: $rax = MOV64rm %stack.0, 1, $noreg, 0, $noreg :: (load 8 from %stack.0) ; CHECK: renamable $ecx = MOV32r0 implicit-def dead $eflags - ; CHECK: renamable $rdx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit - ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rdx :: (store 8 into %stack.1) + ; CHECK: renamable $rcx = SUBREG_TO_REG 0, killed renamable $ecx, %subreg.sub_32bit + ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, killed $rcx :: (store 8 into %stack.1) ; CHECK: JMP64r killed renamable $rax bb.0: liveins: $edi, $rsi Index: llvm/test/CodeGen/X86/swift-return.ll =================================================================== --- llvm/test/CodeGen/X86/swift-return.ll +++ llvm/test/CodeGen/X86/swift-return.ll @@ -28,11 +28,10 @@ ; CHECK-O0-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-O0-NEXT: callq gen -; CHECK-O0-NEXT: movswl %ax, %ecx -; CHECK-O0-NEXT: movsbl %dl, %esi -; CHECK-O0-NEXT: addl %esi, %ecx -; CHECK-O0-NEXT: # kill: def $cx killed $cx killed $ecx -; CHECK-O0-NEXT: movw %cx, %ax +; CHECK-O0-NEXT: cwtl +; CHECK-O0-NEXT: movsbl %dl, %ecx +; CHECK-O0-NEXT: addl %ecx, %eax +; CHECK-O0-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-O0-NEXT: popq %rcx ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq @@ -80,16 +79,16 @@ ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi ; CHECK-O0-NEXT: movq %rsp, %rax ; CHECK-O0-NEXT: callq gen2 +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %eax ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edx -; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %esi -; CHECK-O0-NEXT: movl (%rsp), %edi -; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %r8d -; CHECK-O0-NEXT: addl %r8d, %edi -; CHECK-O0-NEXT: addl %esi, %edi -; CHECK-O0-NEXT: addl %edx, %edi -; CHECK-O0-NEXT: addl %ecx, %edi -; CHECK-O0-NEXT: movl %edi, %eax +; CHECK-O0-NEXT: movl (%rsp), %esi +; CHECK-O0-NEXT: movl {{[0-9]+}}(%rsp), %edi +; CHECK-O0-NEXT: addl %edi, %esi +; CHECK-O0-NEXT: addl %edx, %esi +; CHECK-O0-NEXT: addl %ecx, %esi +; CHECK-O0-NEXT: addl %eax, %esi +; CHECK-O0-NEXT: movl %esi, %eax ; CHECK-O0-NEXT: addq $24, %rsp ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq @@ -264,17 +263,17 @@ ; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O0-NEXT: callq produce_i1_ret ; CHECK-O0-NEXT: andb $1, %al -; CHECK-O0-NEXT: movzbl %al, %esi -; CHECK-O0-NEXT: movl %esi, var +; CHECK-O0-NEXT: movzbl %al, %eax +; CHECK-O0-NEXT: movl %eax, var ; CHECK-O0-NEXT: andb $1, %dl -; CHECK-O0-NEXT: movzbl %dl, %esi -; CHECK-O0-NEXT: movl %esi, var +; CHECK-O0-NEXT: movzbl %dl, %eax +; CHECK-O0-NEXT: movl %eax, var ; CHECK-O0-NEXT: andb $1, %cl -; CHECK-O0-NEXT: movzbl %cl, %esi -; CHECK-O0-NEXT: movl %esi, var +; CHECK-O0-NEXT: movzbl %cl, %eax +; CHECK-O0-NEXT: movl %eax, var ; CHECK-O0-NEXT: andb $1, %r8b -; CHECK-O0-NEXT: movzbl %r8b, %esi -; CHECK-O0-NEXT: movl %esi, var +; CHECK-O0-NEXT: movzbl %r8b, %eax +; CHECK-O0-NEXT: movl %eax, var ; CHECK-O0-NEXT: popq %rax ; CHECK-O0-NEXT: .cfi_def_cfa_offset 8 ; CHECK-O0-NEXT: retq Index: llvm/test/CodeGen/X86/swifterror.ll =================================================================== --- llvm/test/CodeGen/X86/swifterror.ll +++ llvm/test/CodeGen/X86/swifterror.ll @@ -790,8 +790,8 @@ ; CHECK-O0-LABEL: testAssign4 ; CHECK-O0: callq _foo2 ; CHECK-O0: xorl %eax, %eax -; CHECK-O0: movl %eax, %ecx -; CHECK-O0: movq %rcx, [[SLOT:[-a-z0-9\(\)\%]*]] +; CHECK-O0: ## kill: def $rax killed $eax +; CHECK-O0: movq %rax, [[SLOT:[-a-z0-9\(\)\%]*]] ; CHECK-O0: movq [[SLOT]], %rax ; CHECK-O0: movq %rax, [[SLOT2:[-a-z0-9\(\)\%]*]] ; CHECK-O0: movq [[SLOT2]], %r12 Index: llvm/test/DebugInfo/X86/op_deref.ll =================================================================== --- llvm/test/DebugInfo/X86/op_deref.ll +++ llvm/test/DebugInfo/X86/op_deref.ll @@ -6,10 +6,10 @@ ; RUN: | FileCheck %s -check-prefix=CHECK -check-prefix=DWARF3 ; DWARF4: DW_AT_location [DW_FORM_sec_offset] (0x00000000 -; DWARF4-NEXT: {{.*}}: DW_OP_breg1 RDX+0, DW_OP_deref +; DWARF4-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref ; DWARF3: DW_AT_location [DW_FORM_data4] (0x00000000 -; DWARF3-NEXT: {{.*}}: DW_OP_breg1 RDX+0, DW_OP_deref +; DWARF3-NEXT: {{.*}}: DW_OP_breg2 RCX+0, DW_OP_deref ; CHECK-NOT: DW_TAG ; CHECK: DW_AT_name [DW_FORM_strp] ( .debug_str[0x00000067] = "vla") @@ -17,8 +17,8 @@ ; Check the DEBUG_VALUE comments for good measure. ; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - -filetype=asm | FileCheck %s -check-prefix=ASM-CHECK ; vla should have a register-indirect address at one point. -; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rdx+0] -; ASM-CHECK: DW_OP_breg1 +; ASM-CHECK: DEBUG_VALUE: vla <- [DW_OP_deref] [$rcx+0] +; ASM-CHECK: DW_OP_breg2 ; RUN: llvm-as %s -o - | llvm-dis - | FileCheck %s --check-prefix=PRETTY-PRINT ; PRETTY-PRINT: DIExpression(DW_OP_deref)