diff --git a/llvm/lib/CodeGen/RegAllocGreedy.h b/llvm/lib/CodeGen/RegAllocGreedy.h --- a/llvm/lib/CodeGen/RegAllocGreedy.h +++ b/llvm/lib/CodeGen/RegAllocGreedy.h @@ -348,6 +348,12 @@ const SmallVirtRegSet &); MCRegister tryRegionSplit(const LiveInterval &, AllocationOrder &, SmallVectorImpl &); + /// Calculate cost of region splitting around the specified register. + unsigned calculateRegionSplitCostAroundReg(MCPhysReg PhysReg, + AllocationOrder &Order, + BlockFrequency &BestCost, + unsigned &NumCands, + unsigned &BestCand); /// Calculate cost of region splitting. unsigned calculateRegionSplitCost(const LiveInterval &VirtReg, AllocationOrder &Order, @@ -356,6 +362,10 @@ /// Perform region splitting. unsigned doRegionSplit(const LiveInterval &VirtReg, unsigned BestCand, bool HasCompact, SmallVectorImpl &NewVRegs); + /// Try to split VirtReg around physical Hint register. + bool trySplitAroundHintReg(MCPhysReg Hint, const LiveInterval &VirtReg, + SmallVectorImpl &NewVRegs, + AllocationOrder &Order); /// Check other options before using a callee-saved register for the first /// time. MCRegister tryAssignCSRFirstTime(const LiveInterval &VirtReg, diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -141,6 +141,12 @@ "shorter local live ranges will tend to be allocated first"), cl::Hidden); +static cl::opt SplitThresholdForRegWithHint( + "split-threshold-for-reg-with-hint", + cl::desc("The threshold for splitting a virtual register with a hint, in " + "percentate"), + cl::init(75), cl::Hidden); + static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", createGreedyRegisterAllocator); @@ -422,6 +428,11 @@ evictInterference(VirtReg, PhysHint, NewVRegs); return PhysHint; } + + // We can also split the virtual register in cold blocks. + if (trySplitAroundHintReg(PhysHint, VirtReg, NewVRegs, Order)) + return 0; + // Record the missed hint, we may be able to recover // at the end if the surrounding allocation changed. SetOfBrokenHints.insert(&VirtReg); @@ -1064,86 +1075,98 @@ return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); } -unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg, +unsigned +RAGreedy::calculateRegionSplitCostAroundReg(MCPhysReg PhysReg, AllocationOrder &Order, BlockFrequency &BestCost, unsigned &NumCands, - bool IgnoreCSR) { - unsigned BestCand = NoCand; - for (MCPhysReg PhysReg : Order) { - assert(PhysReg); - if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg)) - continue; - - // Discard bad candidates before we run out of interference cache cursors. - // This will only affect register classes with a lot of registers (>32). - if (NumCands == IntfCache.getMaxCursors()) { - unsigned WorstCount = ~0u; - unsigned Worst = 0; - for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) { - if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg) - continue; - unsigned Count = GlobalCand[CandIndex].LiveBundles.count(); - if (Count < WorstCount) { - Worst = CandIndex; - WorstCount = Count; - } + unsigned &BestCand) { + // Discard bad candidates before we run out of interference cache cursors. + // This will only affect register classes with a lot of registers (>32). + if (NumCands == IntfCache.getMaxCursors()) { + unsigned WorstCount = ~0u; + unsigned Worst = 0; + for (unsigned CandIndex = 0; CandIndex != NumCands; ++CandIndex) { + if (CandIndex == BestCand || !GlobalCand[CandIndex].PhysReg) + continue; + unsigned Count = GlobalCand[CandIndex].LiveBundles.count(); + if (Count < WorstCount) { + Worst = CandIndex; + WorstCount = Count; } - --NumCands; - GlobalCand[Worst] = GlobalCand[NumCands]; - if (BestCand == NumCands) - BestCand = Worst; } + --NumCands; + GlobalCand[Worst] = GlobalCand[NumCands]; + if (BestCand == NumCands) + BestCand = Worst; + } - if (GlobalCand.size() <= NumCands) - GlobalCand.resize(NumCands+1); - GlobalSplitCandidate &Cand = GlobalCand[NumCands]; - Cand.reset(IntfCache, PhysReg); + if (GlobalCand.size() <= NumCands) + GlobalCand.resize(NumCands+1); + GlobalSplitCandidate &Cand = GlobalCand[NumCands]; + Cand.reset(IntfCache, PhysReg); - SpillPlacer->prepare(Cand.LiveBundles); - BlockFrequency Cost; - if (!addSplitConstraints(Cand.Intf, Cost)) { - LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n"); - continue; - } - LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = "; - MBFI->printBlockFreq(dbgs(), Cost)); - if (Cost >= BestCost) { - LLVM_DEBUG({ - if (BestCand == NoCand) - dbgs() << " worse than no bundles\n"; - else - dbgs() << " worse than " - << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; - }); - continue; - } - if (!growRegion(Cand)) { - LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n"); - continue; - } + SpillPlacer->prepare(Cand.LiveBundles); + BlockFrequency Cost; + if (!addSplitConstraints(Cand.Intf, Cost)) { + LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tno positive bundles\n"); + return BestCand; + } + LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << "\tstatic = "; + MBFI->printBlockFreq(dbgs(), Cost)); + if (Cost >= BestCost) { + LLVM_DEBUG({ + if (BestCand == NoCand) + dbgs() << " worse than no bundles\n"; + else + dbgs() << " worse than " + << printReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; + }); + return BestCand; + } + if (!growRegion(Cand)) { + LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n"); + return BestCand; + } + + SpillPlacer->finish(); + + // No live bundles, defer to splitSingleBlocks(). + if (!Cand.LiveBundles.any()) { + LLVM_DEBUG(dbgs() << " no bundles.\n"); + return BestCand; + } + + Cost += calcGlobalSplitCost(Cand, Order); + LLVM_DEBUG({ + dbgs() << ", total = "; + MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; + for (int I : Cand.LiveBundles.set_bits()) + dbgs() << " EB#" << I; + dbgs() << ".\n"; + }); + if (Cost < BestCost) { + BestCand = NumCands; + BestCost = Cost; + } + ++NumCands; - SpillPlacer->finish(); + return BestCand; +} - // No live bundles, defer to splitSingleBlocks(). - if (!Cand.LiveBundles.any()) { - LLVM_DEBUG(dbgs() << " no bundles.\n"); +unsigned RAGreedy::calculateRegionSplitCost(const LiveInterval &VirtReg, + AllocationOrder &Order, + BlockFrequency &BestCost, + unsigned &NumCands, + bool IgnoreCSR) { + unsigned BestCand = NoCand; + for (MCPhysReg PhysReg : Order) { + assert(PhysReg); + if (IgnoreCSR && EvictAdvisor->isUnusedCalleeSavedReg(PhysReg)) continue; - } - Cost += calcGlobalSplitCost(Cand, Order); - LLVM_DEBUG({ - dbgs() << ", total = "; - MBFI->printBlockFreq(dbgs(), Cost) << " with bundles"; - for (int I : Cand.LiveBundles.set_bits()) - dbgs() << " EB#" << I; - dbgs() << ".\n"; - }); - if (Cost < BestCost) { - BestCand = NumCands; - BestCost = Cost; - } - ++NumCands; + calculateRegionSplitCostAroundReg(PhysReg, Order, BestCost, NumCands, + BestCand); } return BestCand; @@ -1189,6 +1212,53 @@ return 0; } +// VirtReg has a physical Hint, this function tries to split VirtReg around +// Hint if we can place new COPY instructions in cold blocks. +bool RAGreedy::trySplitAroundHintReg(MCPhysReg Hint, + const LiveInterval &VirtReg, + SmallVectorImpl &NewVRegs, + AllocationOrder &Order) { + BlockFrequency Cost = 0; + Register Reg = VirtReg.reg(); + + // Compute the cost of assigning a non Hint physical register to VirtReg. + // We define it as the total frequency of broken COPY instructions to/from + // Hint register, and after split, they can be deleted. + for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { + if (!TII->isFullCopyInstr(Instr)) + continue; + Register OtherReg = Instr.getOperand(1).getReg(); + if (OtherReg == Reg) { + OtherReg = Instr.getOperand(0).getReg(); + if (OtherReg == Reg) + continue; + // Check if VirtReg interferes with OtherReg after this COPY instruction. + if (VirtReg.liveAt(LIS->getInstructionIndex(Instr).getRegSlot())) + continue; + } + MCRegister OtherPhysReg = + OtherReg.isPhysical() ? OtherReg.asMCReg() : VRM->getPhys(OtherReg); + if (OtherPhysReg == Hint) + Cost += MBFI->getBlockFreq(Instr.getParent()); + } + + // Decrease the cost so it will be split in colder blocks. + BranchProbability Threshold(SplitThresholdForRegWithHint, 100); + Cost *= Threshold; + if (Cost == 0) + return false; + + unsigned NumCands = 0; + unsigned BestCand = NoCand; + SA->analyze(&VirtReg); + calculateRegionSplitCostAroundReg(Hint, Order, Cost, NumCands, BestCand); + if (BestCand == NoCand) + return false; + + doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); + return true; +} + //===----------------------------------------------------------------------===// // Per-Block Splitting //===----------------------------------------------------------------------===// @@ -2329,6 +2399,9 @@ } else return PhysReg; } + // Non emtpy NewVRegs means VirtReg has been split. + if (!NewVRegs.empty()) + return 0; LiveRangeStage Stage = ExtraInfo->getStage(VirtReg); LLVM_DEBUG(dbgs() << StageName[Stage] << " Cascade " diff --git a/llvm/test/CodeGen/AArch64/cgp-usubo.ll b/llvm/test/CodeGen/AArch64/cgp-usubo.ll --- a/llvm/test/CodeGen/AArch64/cgp-usubo.ll +++ b/llvm/test/CodeGen/AArch64/cgp-usubo.ll @@ -161,13 +161,13 @@ ; CHECK-NEXT: tbz w3, #0, .LBB8_3 ; CHECK-NEXT: // %bb.1: // %t ; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: mov x23, x0 +; CHECK-NEXT: mov x22, x0 ; CHECK-NEXT: mov x20, x2 ; CHECK-NEXT: cset w21, lo -; CHECK-NEXT: mov x22, x1 +; CHECK-NEXT: mov x23, x1 ; CHECK-NEXT: mov w0, w21 ; CHECK-NEXT: bl call -; CHECK-NEXT: subs x8, x23, x22 +; CHECK-NEXT: subs x8, x22, x23 ; CHECK-NEXT: b.hs .LBB8_3 ; CHECK-NEXT: // %bb.2: // %end ; CHECK-NEXT: mov w19, w21 diff --git a/llvm/test/CodeGen/AArch64/csr-split.ll b/llvm/test/CodeGen/AArch64/csr-split.ll --- a/llvm/test/CodeGen/AArch64/csr-split.ll +++ b/llvm/test/CodeGen/AArch64/csr-split.ll @@ -83,19 +83,18 @@ define dso_local signext i32 @test2(ptr %p1) local_unnamed_addr uwtable { ; CHECK-LABEL: test2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: cbz x0, .LBB1_3 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: adrp x8, a -; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: ldrsw x8, [x8, :lo12:a] ; CHECK-NEXT: cmp x8, x0 ; CHECK-NEXT: b.ne .LBB1_3 ; CHECK-NEXT: // %bb.2: // %if.then2 +; CHECK-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload @@ -104,34 +103,28 @@ ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: b callNonVoid ; CHECK-NEXT: .LBB1_3: // %return -; CHECK-NEXT: .cfi_restore_state ; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w19 -; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; ; CHECK-APPLE-LABEL: test2: ; CHECK-APPLE: ; %bb.0: ; %entry -; CHECK-APPLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill -; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 32 -; CHECK-APPLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill -; CHECK-APPLE-NEXT: .cfi_offset w30, -8 -; CHECK-APPLE-NEXT: .cfi_offset w29, -16 -; CHECK-APPLE-NEXT: .cfi_offset w19, -24 -; CHECK-APPLE-NEXT: .cfi_offset w20, -32 -; CHECK-APPLE-NEXT: .cfi_remember_state ; CHECK-APPLE-NEXT: cbz x0, LBB1_3 ; CHECK-APPLE-NEXT: ; %bb.1: ; %entry ; CHECK-APPLE-NEXT: Lloh2: ; CHECK-APPLE-NEXT: adrp x8, _a@PAGE -; CHECK-APPLE-NEXT: mov x19, x0 ; CHECK-APPLE-NEXT: Lloh3: ; CHECK-APPLE-NEXT: ldrsw x8, [x8, _a@PAGEOFF] ; CHECK-APPLE-NEXT: cmp x8, x0 ; CHECK-APPLE-NEXT: b.ne LBB1_3 ; CHECK-APPLE-NEXT: ; %bb.2: ; %if.then2 +; CHECK-APPLE-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 32 +; CHECK-APPLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-APPLE-NEXT: .cfi_offset w30, -8 +; CHECK-APPLE-NEXT: .cfi_offset w29, -16 +; CHECK-APPLE-NEXT: .cfi_offset w19, -24 +; CHECK-APPLE-NEXT: .cfi_offset w20, -32 +; CHECK-APPLE-NEXT: mov x19, x0 ; CHECK-APPLE-NEXT: bl _callVoid ; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: mov x0, x19 @@ -143,15 +136,7 @@ ; CHECK-APPLE-NEXT: .cfi_restore w20 ; CHECK-APPLE-NEXT: b _callNonVoid ; CHECK-APPLE-NEXT: LBB1_3: ; %return -; CHECK-APPLE-NEXT: .cfi_restore_state -; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: mov w0, wzr -; CHECK-APPLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload -; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 0 -; CHECK-APPLE-NEXT: .cfi_restore w30 -; CHECK-APPLE-NEXT: .cfi_restore w29 -; CHECK-APPLE-NEXT: .cfi_restore w19 -; CHECK-APPLE-NEXT: .cfi_restore w20 ; CHECK-APPLE-NEXT: ret ; CHECK-APPLE-NEXT: .loh AdrpLdr Lloh2, Lloh3 entry: diff --git a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll --- a/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll +++ b/llvm/test/CodeGen/AMDGPU/blender-no-live-segment-at-def-implicit-def.ll @@ -12,8 +12,7 @@ ; CHECK-NEXT: s_load_dwordx8 s[36:43], s[8:9], 0x0 ; CHECK-NEXT: s_add_u32 s0, s0, s17 ; CHECK-NEXT: s_addc_u32 s1, s1, 0 -; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9] -; CHECK-NEXT: s_mov_b32 s8, 0 +; CHECK-NEXT: s_mov_b32 s12, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_cmp_lg_u32 s40, 0 ; CHECK-NEXT: s_cbranch_scc1 .LBB0_8 @@ -22,51 +21,54 @@ ; CHECK-NEXT: s_cbranch_scc1 .LBB0_4 ; CHECK-NEXT: ; %bb.2: ; %if.else251.i.i ; CHECK-NEXT: s_cmp_lg_u32 s43, 0 -; CHECK-NEXT: s_mov_b32 s12, 0 -; CHECK-NEXT: s_cselect_b32 s8, -1, 0 -; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s8 +; CHECK-NEXT: s_mov_b32 s17, 0 +; CHECK-NEXT: s_cselect_b32 s12, -1, 0 +; CHECK-NEXT: s_and_b32 vcc_lo, exec_lo, s12 ; CHECK-NEXT: s_cbranch_vccz .LBB0_5 ; CHECK-NEXT: ; %bb.3: ; CHECK-NEXT: s_mov_b32 s36, 0 -; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s8 +; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12 ; CHECK-NEXT: s_cbranch_vccz .LBB0_6 ; CHECK-NEXT: s_branch .LBB0_7 ; CHECK-NEXT: .LBB0_4: -; CHECK-NEXT: s_mov_b32 s10, s8 -; CHECK-NEXT: s_mov_b32 s11, s8 -; CHECK-NEXT: s_mov_b32 s9, s8 -; CHECK-NEXT: s_mov_b64 s[38:39], s[10:11] -; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9] +; CHECK-NEXT: s_mov_b32 s14, s12 +; CHECK-NEXT: s_mov_b32 s15, s12 +; CHECK-NEXT: s_mov_b32 s13, s12 +; CHECK-NEXT: s_mov_b64 s[38:39], s[14:15] +; CHECK-NEXT: s_mov_b64 s[36:37], s[12:13] ; CHECK-NEXT: s_branch .LBB0_7 ; CHECK-NEXT: .LBB0_5: ; %if.then263.i.i -; CHECK-NEXT: v_cmp_lt_f32_e64 s8, s41, 0 +; CHECK-NEXT: v_cmp_lt_f32_e64 s12, s41, 0 ; CHECK-NEXT: s_mov_b32 s36, 1.0 -; CHECK-NEXT: s_mov_b32 s12, 0x7fc00000 +; CHECK-NEXT: s_mov_b32 s17, 0x7fc00000 ; CHECK-NEXT: s_mov_b32 s37, s36 ; CHECK-NEXT: s_mov_b32 s38, s36 ; CHECK-NEXT: s_mov_b32 s39, s36 -; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s8 +; CHECK-NEXT: s_andn2_b32 vcc_lo, exec_lo, s12 ; CHECK-NEXT: s_cbranch_vccnz .LBB0_7 ; CHECK-NEXT: .LBB0_6: ; %if.end273.i.i -; CHECK-NEXT: s_add_u32 s8, s34, 40 -; CHECK-NEXT: s_addc_u32 s9, s35, 0 +; CHECK-NEXT: s_add_u32 s12, s8, 40 +; CHECK-NEXT: s_addc_u32 s13, s9, 0 ; CHECK-NEXT: s_getpc_b64 s[18:19] ; CHECK-NEXT: s_add_u32 s18, s18, _Z3dotDv3_fS_@gotpcrel32@lo+4 ; CHECK-NEXT: s_addc_u32 s19, s19, _Z3dotDv3_fS_@gotpcrel32@hi+12 ; CHECK-NEXT: v_lshlrev_b32_e32 v2, 20, v2 ; CHECK-NEXT: s_load_dwordx2 s[18:19], s[18:19], 0x0 ; CHECK-NEXT: v_lshlrev_b32_e32 v3, 10, v1 -; CHECK-NEXT: v_add_f32_e64 v1, s12, s36 +; CHECK-NEXT: v_add_f32_e64 v1, s17, s36 +; CHECK-NEXT: s_mov_b64 s[34:35], s[8:9] +; CHECK-NEXT: s_mov_b64 s[8:9], s[12:13] ; CHECK-NEXT: s_mov_b32 s12, s14 -; CHECK-NEXT: s_mov_b32 s13, s15 -; CHECK-NEXT: s_mov_b32 s14, s16 ; CHECK-NEXT: v_or3_b32 v31, v0, v3, v2 ; CHECK-NEXT: v_mov_b32_e32 v0, v1 ; CHECK-NEXT: v_mov_b32_e32 v1, 0 ; CHECK-NEXT: v_mov_b32_e32 v2, 0 +; CHECK-NEXT: s_mov_b32 s13, s15 +; CHECK-NEXT: s_mov_b32 s14, s16 ; CHECK-NEXT: s_mov_b32 s36, 0 ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: s_swappc_b64 s[30:31], s[18:19] +; CHECK-NEXT: s_mov_b64 s[8:9], s[34:35] ; CHECK-NEXT: s_mov_b32 s37, s36 ; CHECK-NEXT: s_mov_b32 s38, s36 ; CHECK-NEXT: s_mov_b32 s39, s36 @@ -77,7 +79,7 @@ ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 ; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; CHECK-NEXT: .LBB0_8: ; %kernel_direct_lighting.exit -; CHECK-NEXT: s_load_dwordx2 s[4:5], s[34:35], 0x20 +; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x20 ; CHECK-NEXT: v_mov_b32_e32 v0, s36 ; CHECK-NEXT: v_mov_b32_e32 v4, 0 ; CHECK-NEXT: v_mov_b32_e32 v1, s37 diff --git a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir --- a/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir +++ b/llvm/test/CodeGen/AMDGPU/greedy-alloc-fail-sgpr1024-spill.mir @@ -67,7 +67,7 @@ ; CHECK-NEXT: $sgpr10_sgpr11 = COPY killed renamable $sgpr36_sgpr37 ; CHECK-NEXT: $sgpr12 = COPY killed renamable $sgpr42 ; CHECK-NEXT: $sgpr13 = COPY killed renamable $sgpr33 - ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr68_sgpr69, 0, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3 + ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr68_sgpr69, 0, csr_amdgpu, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr8_sgpr9, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr0_sgpr1_sgpr2_sgpr3 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY $exec, implicit-def $exec ; CHECK-NEXT: dead renamable $sgpr6_sgpr7 = IMPLICIT_DEF @@ -85,7 +85,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 ; CHECK-NEXT: renamable $sgpr6 = S_LSHL_B32 renamable $sgpr67, 1, implicit-def dead $scc - ; CHECK-NEXT: dead [[COPY]]:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 [[COPY]], 0, killed $sgpr6, 3, implicit-def $m0, implicit $m0, implicit $exec + ; CHECK-NEXT: dead [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32_:%[0-9]+]]:vreg_1024 = V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32 [[V_INDIRECT_REG_WRITE_GPR_IDX_B32_V32_]], 0, killed $sgpr6, 3, implicit-def $m0, implicit $m0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.1(0x40000000) @@ -114,7 +114,7 @@ ; CHECK-NEXT: renamable $sgpr87 = COPY renamable $sgpr44 ; CHECK-NEXT: renamable $sgpr88 = COPY renamable $sgpr44 ; CHECK-NEXT: renamable $sgpr89 = COPY renamable $sgpr44 - ; CHECK-NEXT: dead %18:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, implicit $exec + ; CHECK-NEXT: dead [[COPY:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99, implicit $exec ; CHECK-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.5, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.1 @@ -125,7 +125,7 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr6_sgpr7, implicit-def $scc ; CHECK-NEXT: dead renamable $sgpr4 = S_LSHL_B32 killed renamable $sgpr66, 1, implicit-def dead $scc - ; CHECK-NEXT: dead %16:vreg_1024 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 + ; CHECK-NEXT: dead [[COPY1:%[0-9]+]]:vreg_1024 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: bb.0: diff --git a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll --- a/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll +++ b/llvm/test/CodeGen/AMDGPU/insert-delay-alu-bug.ll @@ -55,15 +55,14 @@ ; GFX11-NEXT: s_mov_b64 s[16:17], s[4:5] ; GFX11-NEXT: v_mov_b32_e32 v31, v0 ; GFX11-NEXT: s_load_b32 s24, s[16:17], 0x24 -; GFX11-NEXT: s_mov_b32 s18, s14 ; GFX11-NEXT: s_mov_b32 s12, s13 ; GFX11-NEXT: s_mov_b64 s[10:11], s[6:7] -; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-NEXT: s_mov_b64 s[6:7], s[2:3] +; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v31 ; GFX11-NEXT: s_mov_b64 s[4:5], s[0:1] ; GFX11-NEXT: s_mov_b32 s3, 0 ; GFX11-NEXT: s_mov_b32 s0, -1 -; GFX11-NEXT: s_mov_b32 s19, exec_lo +; GFX11-NEXT: s_mov_b32 s18, exec_lo ; GFX11-NEXT: s_mov_b32 s32, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: v_mul_lo_u32 v0, s24, v0 @@ -72,11 +71,11 @@ ; GFX11-NEXT: s_cbranch_execz .LBB2_13 ; GFX11-NEXT: ; %bb.1: ; %bb14 ; GFX11-NEXT: s_load_b128 s[20:23], s[16:17], 0x2c +; GFX11-NEXT: s_mov_b32 s19, 0 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_bitcmp1_b32 s21, 0 ; GFX11-NEXT: s_cselect_b32 s25, -1, 0 ; GFX11-NEXT: s_bitcmp0_b32 s21, 0 -; GFX11-NEXT: s_mov_b32 s21, 0 ; GFX11-NEXT: s_cbranch_scc0 .LBB2_3 ; GFX11-NEXT: ; %bb.2: ; %bb15 ; GFX11-NEXT: s_add_u32 s8, s16, 0x58 @@ -84,11 +83,13 @@ ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, f0@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, f0@gotpcrel32@hi+12 -; GFX11-NEXT: s_mov_b32 s13, s18 +; GFX11-NEXT: s_mov_b32 s13, s14 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 +; GFX11-NEXT: s_mov_b32 s21, s14 ; GFX11-NEXT: s_mov_b32 s14, s15 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] +; GFX11-NEXT: s_mov_b32 s14, s21 ; GFX11-NEXT: s_mov_b32 s1, -1 ; GFX11-NEXT: s_and_not1_b32 vcc_lo, exec_lo, s3 ; GFX11-NEXT: s_cbranch_vccz .LBB2_4 @@ -125,8 +126,8 @@ ; GFX11-NEXT: s_mul_i32 s2, s2, s20 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GFX11-NEXT: s_or_b32 s2, s24, s2 -; GFX11-NEXT: s_lshl_b64 s[22:23], s[2:3], 1 -; GFX11-NEXT: global_load_u16 v2, v1, s[22:23] +; GFX11-NEXT: s_lshl_b64 s[20:21], s[2:3], 1 +; GFX11-NEXT: global_load_u16 v2, v1, s[20:21] ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v2 ; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo @@ -165,13 +166,13 @@ ; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, s0 ; GFX11-NEXT: s_cbranch_vccz .LBB2_10 ; GFX11-NEXT: ; %bb.11: ; %Flow6 -; GFX11-NEXT: s_mov_b32 s21, -1 +; GFX11-NEXT: s_mov_b32 s19, -1 ; GFX11-NEXT: .LBB2_12: ; %Flow11 ; GFX11-NEXT: s_and_b32 s3, s1, exec_lo -; GFX11-NEXT: s_or_not1_b32 s0, s21, exec_lo +; GFX11-NEXT: s_or_not1_b32 s0, s19, exec_lo ; GFX11-NEXT: .LBB2_13: ; %Flow9 -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s19 -; GFX11-NEXT: s_and_saveexec_b32 s19, s0 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s18 +; GFX11-NEXT: s_and_saveexec_b32 s18, s0 ; GFX11-NEXT: s_cbranch_execz .LBB2_15 ; GFX11-NEXT: ; %bb.14: ; %bb43 ; GFX11-NEXT: s_add_u32 s8, s16, 0x58 @@ -179,14 +180,14 @@ ; GFX11-NEXT: s_getpc_b64 s[0:1] ; GFX11-NEXT: s_add_u32 s0, s0, f0@gotpcrel32@lo+4 ; GFX11-NEXT: s_addc_u32 s1, s1, f0@gotpcrel32@hi+12 -; GFX11-NEXT: s_mov_b32 s13, s18 +; GFX11-NEXT: s_mov_b32 s13, s14 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 ; GFX11-NEXT: s_mov_b32 s14, s15 ; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_or_b32 s3, s3, exec_lo ; GFX11-NEXT: .LBB2_15: ; %Flow14 -; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s19 +; GFX11-NEXT: s_or_b32 exec_lo, exec_lo, s18 ; GFX11-NEXT: s_and_saveexec_b32 s0, s3 ; GFX11-NEXT: ; %bb.16: ; %UnifiedUnreachableBlock ; GFX11-NEXT: ; divergent unreachable diff --git a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir --- a/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir +++ b/llvm/test/CodeGen/AMDGPU/ran-out-of-sgprs-allocation-failure.mir @@ -40,91 +40,71 @@ ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $vgpr0, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9, $sgpr10_sgpr11 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr33 = COPY $sgpr14 - ; CHECK-NEXT: renamable $sgpr34_sgpr35 = COPY $sgpr4_sgpr5 - ; CHECK-NEXT: renamable $sgpr12_sgpr13 = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: renamable $sgpr18_sgpr19 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: renamable $sgpr20_sgpr21 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: renamable $sgpr22_sgpr23 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: renamable $sgpr52 = S_MOV_B32 0 - ; CHECK-NEXT: renamable $sgpr24_sgpr25 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec - ; CHECK-NEXT: renamable $sgpr100_sgpr101 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: renamable $sgpr53 = S_MOV_B32 1083786240 - ; CHECK-NEXT: SI_SPILL_S1024_SAVE renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s1024) into %stack.1, align 4, addrspace 5) + ; CHECK-NEXT: renamable $sgpr34_sgpr35 = V_CMP_GT_I32_e64 1, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr36_sgpr37 = V_CMP_EQ_U32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr38_sgpr39 = V_CMP_NE_U32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr40_sgpr41 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr60 = S_MOV_B32 0 + ; CHECK-NEXT: renamable $sgpr42_sgpr43 = V_CMP_EQ_U32_e64 undef $sgpr4, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_1024_align2 = COPY renamable $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75, implicit $exec + ; CHECK-NEXT: renamable $sgpr44_sgpr45 = V_CMP_NE_U32_e64 1, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: renamable $sgpr61 = S_MOV_B32 1083786240 ; CHECK-NEXT: S_BRANCH %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.17(0x40000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr100_sgpr101, implicit-def dead $scc + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr44_sgpr45, implicit-def dead $scc ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_1024_align2 = COPY [[COPY]] ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.17 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2: ; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.5(0x40000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr36 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr37 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr38 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr39 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr40 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr41 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr42 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr43 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr44 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr45 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr46 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr47 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr48 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr49 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr50 = COPY renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr51 = COPY killed renamable $sgpr68 - ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr52 = COPY killed renamable $sgpr84 - ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr53 = COPY killed renamable $sgpr72 - ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr54 = COPY killed renamable $sgpr72 - ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr55 = COPY killed renamable $sgpr72 - ; CHECK-NEXT: renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr56 = COPY killed renamable $sgpr72 - ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr57 = COPY killed renamable $sgpr76 - ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr58 = COPY killed renamable $sgpr76 - ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr59 = COPY killed renamable $sgpr76 - ; CHECK-NEXT: renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr60 = COPY killed renamable $sgpr76 - ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr61 = COPY killed renamable $sgpr80 - ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr62 = COPY killed renamable $sgpr80 - ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr63 = COPY killed renamable $sgpr80 - ; CHECK-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr64 = COPY killed renamable $sgpr80 - ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr65 = COPY killed renamable $sgpr84 - ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr66 = COPY killed renamable $sgpr84 - ; CHECK-NEXT: renamable $sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95_sgpr96_sgpr97_sgpr98_sgpr99 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: renamable $sgpr67 = COPY killed renamable $sgpr84 - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr36_sgpr37_sgpr38_sgpr39_sgpr40_sgpr41_sgpr42_sgpr43_sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67, implicit $exec + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $sgpr64 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr65 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr66 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr67 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr68 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr69 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr70 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr71 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr72 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr73 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr74 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr75 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr76 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr77 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr78 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr79 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr80 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr81 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr82 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr83 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr84 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr85 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr86 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr87 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr88 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr89 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr90 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr91 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr92 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr93 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr94 = COPY renamable $sgpr60 + ; CHECK-NEXT: renamable $sgpr95 = COPY renamable $sgpr60 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_1024_align2 = COPY killed renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit $exec ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.5 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: successors: %bb.4(0x80000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr60 = COPY killed renamable $sgpr33 + ; CHECK-NEXT: renamable $sgpr60 = COPY killed renamable $sgpr14 ; CHECK-NEXT: renamable $sgpr62 = COPY killed renamable $sgpr15 ; CHECK-NEXT: SI_SPILL_S32_SAVE killed renamable $sgpr16, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5) ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 @@ -145,124 +125,117 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.5: ; CHECK-NEXT: successors: %bb.12(0x40000000), %bb.6(0x40000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr4_sgpr5 = S_AND_B64 renamable $sgpr20_sgpr21, undef renamable $sgpr88_sgpr89, implicit-def dead $scc - ; CHECK-NEXT: renamable $sgpr88_sgpr89 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec - ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr4_sgpr5 + ; CHECK-NEXT: renamable $sgpr12_sgpr13 = S_AND_B64 renamable $sgpr38_sgpr39, undef renamable $sgpr46_sgpr47, implicit-def dead $scc + ; CHECK-NEXT: renamable $sgpr46_sgpr47 = V_CMP_GT_I32_e64 0, undef %18:vgpr_32, implicit $exec + ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr12_sgpr13 ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.12, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.6: ; CHECK-NEXT: successors: %bb.7(0x80000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr22_sgpr23, implicit $exec + ; CHECK-NEXT: dead [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr40_sgpr41, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.7: ; CHECK-NEXT: successors: %bb.8(0x80000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr90_sgpr91 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: renamable $sgpr92_sgpr93 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec - ; CHECK-NEXT: dead [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY1]], undef $sgpr33, 11, implicit-def $m0, implicit $m0, implicit $exec + ; CHECK-NEXT: renamable $sgpr48_sgpr49 = nofpexcept V_CMP_NLT_F64_e64 0, undef $sgpr4_sgpr5, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: renamable $sgpr50_sgpr51 = nofpexcept V_CMP_NLT_F64_e64 0, 4607182418800017408, 0, undef %29:vreg_64_align2, 0, implicit $mode, implicit $exec + ; CHECK-NEXT: dead [[V_INDIRECT_REG_READ_GPR_IDX_B32_V32_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V32 [[COPY1]], undef $sgpr14, 11, implicit-def $m0, implicit $m0, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.8: ; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.9(0x40000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr90_sgpr91, $sgpr92_sgpr93, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr90_sgpr91, implicit-def dead $scc + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr48_sgpr49, implicit-def dead $scc ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.10, implicit $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.9: ; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.17(0x40000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr90_sgpr91, $sgpr92_sgpr93, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83 = SI_SPILL_S1024_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s1024) from %stack.1, align 4, addrspace 5) - ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY killed renamable $sgpr68_sgpr69, implicit $exec + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY renamable $sgpr60_sgpr61, implicit $exec ; CHECK-NEXT: GLOBAL_STORE_DWORDX2_SADDR undef %18:vgpr_32, [[COPY2]], undef renamable $sgpr4_sgpr5, 0, 0, implicit $exec :: (store (s64), addrspace 1) - ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr12_sgpr13, implicit $exec - ; CHECK-NEXT: dead renamable $sgpr4_sgpr5 = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_1]], implicit $exec - ; CHECK-NEXT: renamable $sgpr64 = S_ADD_U32 renamable $sgpr8, 32, implicit-def dead $scc + ; CHECK-NEXT: [[V_CNDMASK_B32_e64_1:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, $sgpr34_sgpr35, implicit $exec + ; CHECK-NEXT: dead renamable $sgpr12_sgpr13 = V_CMP_NE_U32_e64 1, [[V_CNDMASK_B32_e64_1]], implicit $exec + ; CHECK-NEXT: renamable $sgpr58 = S_ADD_U32 renamable $sgpr8, 32, implicit-def dead $scc ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: $sgpr4_sgpr5 = COPY renamable $sgpr34_sgpr35 - ; CHECK-NEXT: renamable $sgpr42_sgpr43 = COPY killed renamable $sgpr6_sgpr7 - ; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr42_sgpr43 - ; CHECK-NEXT: renamable $sgpr38_sgpr39 = COPY killed renamable $sgpr10_sgpr11 - ; CHECK-NEXT: $sgpr10_sgpr11 = COPY renamable $sgpr38_sgpr39 - ; CHECK-NEXT: renamable $sgpr44_sgpr45 = COPY killed renamable $sgpr12_sgpr13 - ; CHECK-NEXT: $sgpr12 = COPY renamable $sgpr33 + ; CHECK-NEXT: renamable $sgpr52_sgpr53 = COPY killed renamable $sgpr4_sgpr5 + ; CHECK-NEXT: $sgpr4_sgpr5 = COPY renamable $sgpr52_sgpr53 + ; CHECK-NEXT: renamable $sgpr54_sgpr55 = COPY killed renamable $sgpr6_sgpr7 + ; CHECK-NEXT: $sgpr6_sgpr7 = COPY renamable $sgpr54_sgpr55 + ; CHECK-NEXT: renamable $sgpr56_sgpr57 = COPY killed renamable $sgpr10_sgpr11 + ; CHECK-NEXT: $sgpr10_sgpr11 = COPY renamable $sgpr56_sgpr57 + ; CHECK-NEXT: $sgpr12 = COPY renamable $sgpr14 ; CHECK-NEXT: $sgpr13 = COPY renamable $sgpr15 - ; CHECK-NEXT: renamable $sgpr36 = COPY killed renamable $sgpr16 - ; CHECK-NEXT: renamable $sgpr37 = COPY killed renamable $sgpr15 - ; CHECK-NEXT: renamable $sgpr40 = COPY killed renamable $sgpr8 - ; CHECK-NEXT: renamable $sgpr46_sgpr47 = COPY killed renamable $sgpr18_sgpr19 - ; CHECK-NEXT: renamable $sgpr48_sgpr49 = COPY killed renamable $sgpr20_sgpr21 - ; CHECK-NEXT: renamable $sgpr50_sgpr51 = COPY killed renamable $sgpr22_sgpr23 - ; CHECK-NEXT: renamable $sgpr52_sgpr53 = COPY killed renamable $sgpr24_sgpr25 - ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13 + ; CHECK-NEXT: renamable $sgpr62 = COPY killed renamable $sgpr8 + ; CHECK-NEXT: renamable $sgpr33 = COPY killed renamable $sgpr16 + ; CHECK-NEXT: renamable $sgpr59 = COPY killed renamable $sgpr15 + ; CHECK-NEXT: renamable $sgpr63 = COPY killed renamable $sgpr14 + ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr4_sgpr5, implicit $sgpr6_sgpr7, implicit $sgpr10_sgpr11, implicit $sgpr12, implicit $sgpr13 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr64_sgpr65 - ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr4_sgpr5, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9 - ; CHECK-NEXT: renamable $sgpr24_sgpr25 = COPY killed renamable $sgpr52_sgpr53 - ; CHECK-NEXT: renamable $sgpr22_sgpr23 = COPY killed renamable $sgpr50_sgpr51 - ; CHECK-NEXT: renamable $sgpr20_sgpr21 = COPY killed renamable $sgpr48_sgpr49 - ; CHECK-NEXT: renamable $sgpr18_sgpr19 = COPY killed renamable $sgpr46_sgpr47 - ; CHECK-NEXT: renamable $sgpr12_sgpr13 = COPY killed renamable $sgpr44_sgpr45 - ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr42_sgpr43 - ; CHECK-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr40 - ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr38_sgpr39 - ; CHECK-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr37 - ; CHECK-NEXT: renamable $sgpr16 = COPY killed renamable $sgpr36 + ; CHECK-NEXT: $sgpr8_sgpr9 = COPY renamable $sgpr58_sgpr59 + ; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL undef renamable $sgpr12_sgpr13, 0, csr_amdgpu_gfx90ainsts, implicit $sgpr8_sgpr9 + ; CHECK-NEXT: renamable $sgpr14 = COPY killed renamable $sgpr63 + ; CHECK-NEXT: renamable $sgpr15 = COPY killed renamable $sgpr59 + ; CHECK-NEXT: renamable $sgpr16 = COPY killed renamable $sgpr33 + ; CHECK-NEXT: renamable $sgpr4_sgpr5 = COPY killed renamable $sgpr52_sgpr53 + ; CHECK-NEXT: renamable $sgpr6_sgpr7 = COPY killed renamable $sgpr54_sgpr55 + ; CHECK-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr62 + ; CHECK-NEXT: renamable $sgpr10_sgpr11 = COPY killed renamable $sgpr56_sgpr57 ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 - ; CHECK-NEXT: $exec = S_MOV_B64_term renamable $sgpr92_sgpr93 + ; CHECK-NEXT: $exec = S_MOV_B64_term renamable $sgpr50_sgpr51 ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.10, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.17 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.10: ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.12(0x40000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr90_sgpr91, $sgpr92_sgpr93, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr48_sgpr49, $sgpr50_sgpr51, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.8, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.12 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.11: ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.17(0x40000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec ; CHECK-NEXT: S_BRANCH %bb.17 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.12: ; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.13(0x40000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr88_sgpr89, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr46_sgpr47, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr88_sgpr89 + ; CHECK-NEXT: $exec = S_MOV_B64_term killed renamable $sgpr46_sgpr47 ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.11, implicit $exec ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.13: ; CHECK-NEXT: successors: %bb.15(0x40000000), %bb.14(0x40000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr24_sgpr25, implicit-def dead $scc + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr42_sgpr43, implicit-def dead $scc ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.15, implicit $vcc ; CHECK-NEXT: S_BRANCH %bb.14 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.14: ; CHECK-NEXT: successors: %bb.15(0x80000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.15: ; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.16(0x40000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr12_sgpr13, $sgpr18_sgpr19, $sgpr20_sgpr21, $sgpr22_sgpr23, $sgpr24_sgpr25, $sgpr34_sgpr35, $sgpr100_sgpr101 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16, $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr8_sgpr9:0x0000000000000003, $sgpr10_sgpr11, $sgpr34_sgpr35, $sgpr36_sgpr37, $sgpr38_sgpr39, $sgpr40_sgpr41, $sgpr42_sgpr43, $sgpr44_sgpr45, $sgpr44_sgpr45_sgpr46_sgpr47_sgpr48_sgpr49_sgpr50_sgpr51_sgpr52_sgpr53_sgpr54_sgpr55_sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75:0x0000000F00000000 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr18_sgpr19, implicit-def dead $scc + ; CHECK-NEXT: $vcc = S_AND_B64 $exec, renamable $sgpr36_sgpr37, implicit-def dead $scc ; CHECK-NEXT: S_CBRANCH_VCCNZ %bb.11, implicit $vcc ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.16: ; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.17(0x40000000) - ; CHECK-NEXT: liveins: $sgpr15, $sgpr16, $sgpr33 + ; CHECK-NEXT: liveins: $sgpr14, $sgpr15, $sgpr16 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.3, implicit $exec ; CHECK-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll --- a/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll +++ b/llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll @@ -30,53 +30,53 @@ ; GCN-NEXT: v_writelane_b32 v40, s45, 13 ; GCN-NEXT: v_writelane_b32 v40, s46, 14 ; GCN-NEXT: v_writelane_b32 v40, s47, 15 -; GCN-NEXT: v_mov_b32_e32 v41, v31 -; GCN-NEXT: s_mov_b32 s42, s15 -; GCN-NEXT: s_mov_b32 s43, s14 -; GCN-NEXT: s_mov_b32 s44, s13 -; GCN-NEXT: s_mov_b32 s45, s12 -; GCN-NEXT: s_mov_b64 s[34:35], s[10:11] -; GCN-NEXT: s_mov_b64 s[36:37], s[8:9] -; GCN-NEXT: s_mov_b64 s[38:39], s[6:7] -; GCN-NEXT: s_mov_b64 s[40:41], s[4:5] ; GCN-NEXT: v_mov_b32_e32 v0, 0 ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_load_dword v0, v[0:1] -; GCN-NEXT: s_mov_b64 s[4:5], 0 -; GCN-NEXT: s_mov_b64 s[8:9], -1 +; GCN-NEXT: s_mov_b64 s[16:17], 0 +; GCN-NEXT: s_mov_b64 s[20:21], -1 ; GCN-NEXT: s_waitcnt vmcnt(0) ; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0 ; GCN-NEXT: s_mov_b64 s[46:47], 0 -; GCN-NEXT: s_mov_b64 s[6:7], 0 +; GCN-NEXT: s_mov_b64 s[18:19], 0 ; GCN-NEXT: s_cbranch_vccz .LBB0_9 ; GCN-NEXT: ; %bb.1: ; %Flow -; GCN-NEXT: s_andn2_b64 vcc, exec, s[8:9] +; GCN-NEXT: s_andn2_b64 vcc, exec, s[20:21] ; GCN-NEXT: s_cbranch_vccz .LBB0_10 ; GCN-NEXT: .LBB0_2: ; %Flow1 -; GCN-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GCN-NEXT: s_andn2_b64 vcc, exec, s[18:19] ; GCN-NEXT: s_cbranch_vccnz .LBB0_4 ; GCN-NEXT: .LBB0_3: ; %bb9 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, wibble@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12 -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] -; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] -; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s45 -; GCN-NEXT: s_mov_b32 s13, s44 -; GCN-NEXT: s_mov_b32 s14, s43 -; GCN-NEXT: s_mov_b32 s15, s42 -; GCN-NEXT: v_mov_b32_e32 v31, v41 +; GCN-NEXT: s_mov_b64 s[34:35], s[4:5] +; GCN-NEXT: s_mov_b64 s[36:37], s[6:7] +; GCN-NEXT: s_mov_b64 s[38:39], s[8:9] +; GCN-NEXT: s_mov_b64 s[40:41], s[10:11] +; GCN-NEXT: s_mov_b32 s42, s12 +; GCN-NEXT: s_mov_b32 s43, s13 +; GCN-NEXT: s_mov_b32 s44, s14 +; GCN-NEXT: s_mov_b32 s45, s15 +; GCN-NEXT: v_mov_b32_e32 v41, v31 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] +; GCN-NEXT: v_mov_b32_e32 v31, v41 +; GCN-NEXT: s_mov_b32 s12, s42 +; GCN-NEXT: s_mov_b32 s13, s43 +; GCN-NEXT: s_mov_b32 s14, s44 +; GCN-NEXT: s_mov_b32 s15, s45 +; GCN-NEXT: s_mov_b64 s[4:5], s[34:35] +; GCN-NEXT: s_mov_b64 s[6:7], s[36:37] +; GCN-NEXT: s_mov_b64 s[8:9], s[38:39] +; GCN-NEXT: s_mov_b64 s[10:11], s[40:41] ; GCN-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 -; GCN-NEXT: s_mov_b64 s[4:5], 0 -; GCN-NEXT: s_andn2_b64 s[6:7], s[46:47], exec -; GCN-NEXT: s_and_b64 s[8:9], vcc, exec -; GCN-NEXT: s_or_b64 s[46:47], s[6:7], s[8:9] +; GCN-NEXT: s_mov_b64 s[16:17], 0 +; GCN-NEXT: s_andn2_b64 s[18:19], s[46:47], exec +; GCN-NEXT: s_and_b64 s[20:21], vcc, exec +; GCN-NEXT: s_or_b64 s[46:47], s[18:19], s[20:21] ; GCN-NEXT: .LBB0_4: ; %Flow2 -; GCN-NEXT: s_and_saveexec_b64 s[6:7], s[46:47] -; GCN-NEXT: s_xor_b64 s[6:7], exec, s[6:7] +; GCN-NEXT: s_and_saveexec_b64 s[18:19], s[46:47] +; GCN-NEXT: s_xor_b64 s[18:19], exec, s[18:19] ; GCN-NEXT: s_cbranch_execz .LBB0_6 ; GCN-NEXT: ; %bb.5: ; %bb12 ; GCN-NEXT: v_mov_b32_e32 v2, 0 @@ -84,22 +84,13 @@ ; GCN-NEXT: v_mov_b32_e32 v1, 0 ; GCN-NEXT: flat_store_dword v[0:1], v2 ; GCN-NEXT: .LBB0_6: ; %Flow3 -; GCN-NEXT: s_or_b64 exec, exec, s[6:7] -; GCN-NEXT: s_andn2_b64 vcc, exec, s[4:5] +; GCN-NEXT: s_or_b64 exec, exec, s[18:19] +; GCN-NEXT: s_andn2_b64 vcc, exec, s[16:17] ; GCN-NEXT: s_cbranch_vccnz .LBB0_8 ; GCN-NEXT: ; %bb.7: ; %bb7 ; GCN-NEXT: s_getpc_b64 s[16:17] ; GCN-NEXT: s_add_u32 s16, s16, wibble@rel32@lo+4 ; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12 -; GCN-NEXT: s_mov_b64 s[4:5], s[40:41] -; GCN-NEXT: s_mov_b64 s[6:7], s[38:39] -; GCN-NEXT: s_mov_b64 s[8:9], s[36:37] -; GCN-NEXT: s_mov_b64 s[10:11], s[34:35] -; GCN-NEXT: s_mov_b32 s12, s45 -; GCN-NEXT: s_mov_b32 s13, s44 -; GCN-NEXT: s_mov_b32 s14, s43 -; GCN-NEXT: s_mov_b32 s15, s42 -; GCN-NEXT: v_mov_b32_e32 v31, v41 ; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17] ; GCN-NEXT: .LBB0_8: ; %UnifiedReturnBlock ; GCN-NEXT: v_readlane_b32 s47, v40, 15 @@ -129,13 +120,13 @@ ; GCN-NEXT: s_setpc_b64 s[30:31] ; GCN-NEXT: .LBB0_9: ; %bb2 ; GCN-NEXT: v_cmp_eq_u32_e64 s[46:47], 21, v0 -; GCN-NEXT: v_cmp_ne_u32_e64 s[6:7], 21, v0 +; GCN-NEXT: v_cmp_ne_u32_e64 s[18:19], 21, v0 ; GCN-NEXT: s_mov_b64 vcc, exec ; GCN-NEXT: s_cbranch_execnz .LBB0_2 ; GCN-NEXT: .LBB0_10: ; %bb4 -; GCN-NEXT: s_mov_b64 s[4:5], -1 -; GCN-NEXT: v_cmp_ne_u32_e64 s[6:7], 9, v0 -; GCN-NEXT: s_andn2_b64 vcc, exec, s[6:7] +; GCN-NEXT: s_mov_b64 s[16:17], -1 +; GCN-NEXT: v_cmp_ne_u32_e64 s[18:19], 9, v0 +; GCN-NEXT: s_andn2_b64 vcc, exec, s[18:19] ; GCN-NEXT: s_cbranch_vccz .LBB0_3 ; GCN-NEXT: s_branch .LBB0_4 ; SI-OPT-LABEL: @widget( diff --git a/llvm/test/CodeGen/ARM/csr-split.ll b/llvm/test/CodeGen/ARM/csr-split.ll --- a/llvm/test/CodeGen/ARM/csr-split.ll +++ b/llvm/test/CodeGen/ARM/csr-split.ll @@ -8,14 +8,13 @@ define dso_local signext i32 @test1(ptr %b) local_unnamed_addr { ; CHECK-LABEL: test1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: ldr r0, .LCPI0_0 -; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: cmp r0, r4 -; CHECK-NEXT: popne {r4, lr} +; CHECK-NEXT: ldr r1, .LCPI0_0 +; CHECK-NEXT: ldr r1, [r1] +; CHECK-NEXT: cmp r1, r0 ; CHECK-NEXT: movne pc, lr ; CHECK-NEXT: .LBB0_1: @ %if.then +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: pop {r4, lr} @@ -48,20 +47,19 @@ define dso_local signext i32 @test2(ptr %p1) local_unnamed_addr { ; CHECK-LABEL: test2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp r0, #0 ; CHECK-NEXT: beq .LBB1_2 ; CHECK-NEXT: @ %bb.1: @ %if.end -; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: ldr r0, .LCPI1_0 -; CHECK-NEXT: ldr r0, [r0] -; CHECK-NEXT: cmp r0, r4 +; CHECK-NEXT: ldr r1, .LCPI1_0 +; CHECK-NEXT: ldr r1, [r1] +; CHECK-NEXT: cmp r1, r0 ; CHECK-NEXT: beq .LBB1_3 ; CHECK-NEXT: .LBB1_2: @ %return ; CHECK-NEXT: mov r0, #0 -; CHECK-NEXT: pop {r4, lr} ; CHECK-NEXT: mov pc, lr ; CHECK-NEXT: .LBB1_3: @ %if.then2 +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: pop {r4, lr} diff --git a/llvm/test/CodeGen/ARM/divmod-eabi.ll b/llvm/test/CodeGen/ARM/divmod-eabi.ll --- a/llvm/test/CodeGen/ARM/divmod-eabi.ll +++ b/llvm/test/CodeGen/ARM/divmod-eabi.ll @@ -218,6 +218,7 @@ ; DARWIN: mov [[sum:r[0-9]+]], r0 ; DARWIN-O0: __modsi3 ; WINDOWS: __rt_sdiv +; WINDOWS: mov [[arg:r[0-9]+]], r1 ; WINDOWS: mov [[rem:r[0-9]+]], r1 %rem1 = srem i32 %b, %rem ; EABI: __aeabi_idivmod diff --git a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll --- a/llvm/test/CodeGen/ARM/thumb2-size-opt.ll +++ b/llvm/test/CodeGen/ARM/thumb2-size-opt.ll @@ -85,8 +85,8 @@ define i32 @bundled_instruction(ptr %addr, ptr %addr2, i1 %tst) minsize { ; CHECK-LABEL: bundled_instruction: -; CHECK: itee ne -; CHECK: ldmeq r3!, {{{r[0-9]+}}} +; CHECK: iteee ne +; CHECK: ldmeq r2!, {{{r[0-9]+}}} br i1 %tst, label %true, label %false true: diff --git a/llvm/test/CodeGen/AVR/cttz.ll b/llvm/test/CodeGen/AVR/cttz.ll --- a/llvm/test/CodeGen/AVR/cttz.ll +++ b/llvm/test/CodeGen/AVR/cttz.ll @@ -28,10 +28,9 @@ ; CHECK: mov {{.*}}[[SCRATCH]], {{.*}}[[RESULT]] ; CHECK: swap {{.*}}[[SCRATCH]] ; CHECK: add {{.*}}[[SCRATCH]], {{.*}}[[RESULT]] -; CHECK: andi {{.*}}[[SCRATCH]], 15 ; CHECK: mov {{.*}}[[RESULT]], {{.*}}[[SCRATCH]] +; CHECK: andi {{.*}}[[RESULT]], 15 ; CHECK: ret ; CHECK: [[END_BB]]: -; CHECK: ldi {{.*}}[[SCRATCH]], 8 -; CHECK: mov {{.*}}[[RESULT]], {{.*}}[[SCRATCH]] +; CHECK: ldi {{.*}}[[RESULT]], 8 ; CHECK: ret diff --git a/llvm/test/CodeGen/Hexagon/noreturn-noepilog.ll b/llvm/test/CodeGen/Hexagon/noreturn-noepilog.ll --- a/llvm/test/CodeGen/Hexagon/noreturn-noepilog.ll +++ b/llvm/test/CodeGen/Hexagon/noreturn-noepilog.ll @@ -1,12 +1,10 @@ ; RUN: llc -march=hexagon < %s | FileCheck %s ; -; XFAIL: * -; This test is failing after post-ra machine sinking. ; ; Check that no epilogue is inserted after a noreturn call. ; ; CHECK-LABEL: f1: -; CHECK: allocframe(r29,#0):raw +; CHECK: allocframe ; CHECK-NOT: deallocframe target triple = "hexagon" diff --git a/llvm/test/CodeGen/PowerPC/csr-split.ll b/llvm/test/CodeGen/PowerPC/csr-split.ll --- a/llvm/test/CodeGen/PowerPC/csr-split.ll +++ b/llvm/test/CodeGen/PowerPC/csr-split.ll @@ -11,35 +11,47 @@ define dso_local signext i32 @test1(ptr %b) local_unnamed_addr { ; CHECK-PWR9-LABEL: test1: ; CHECK-PWR9: # %bb.0: # %entry +; CHECK-PWR9-NEXT: addis r4, r2, a@toc@ha +; CHECK-PWR9-NEXT: lwa r4, a@toc@l(r4) +; CHECK-PWR9-NEXT: cmpld r4, r3 +; CHECK-PWR9-NEXT: # implicit-def: $r4 +; CHECK-PWR9-NEXT: beq cr0, .LBB0_2 +; CHECK-PWR9-NEXT: # %bb.1: # %if.end +; CHECK-PWR9-NEXT: extsw r3, r4 +; CHECK-PWR9-NEXT: blr +; CHECK-PWR9-NEXT: .LBB0_2: # %if.then ; CHECK-PWR9-NEXT: mflr r0 ; CHECK-PWR9-NEXT: .cfi_def_cfa_offset 48 ; CHECK-PWR9-NEXT: .cfi_offset lr, 16 ; CHECK-PWR9-NEXT: .cfi_offset r30, -16 ; CHECK-PWR9-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-PWR9-NEXT: stdu r1, -48(r1) -; CHECK-PWR9-NEXT: mr r30, r3 -; CHECK-PWR9-NEXT: addis r3, r2, a@toc@ha ; CHECK-PWR9-NEXT: std r0, 64(r1) -; CHECK-PWR9-NEXT: lwa r3, a@toc@l(r3) -; CHECK-PWR9-NEXT: cmpld r3, r30 -; CHECK-PWR9-NEXT: # implicit-def: $r3 -; CHECK-PWR9-NEXT: bne cr0, .LBB0_2 -; CHECK-PWR9-NEXT: # %bb.1: # %if.then +; CHECK-PWR9-NEXT: mr r30, r3 ; CHECK-PWR9-NEXT: bl callVoid ; CHECK-PWR9-NEXT: nop ; CHECK-PWR9-NEXT: mr r3, r30 ; CHECK-PWR9-NEXT: bl callNonVoid ; CHECK-PWR9-NEXT: nop -; CHECK-PWR9-NEXT: .LBB0_2: # %if.end -; CHECK-PWR9-NEXT: extsw r3, r3 +; CHECK-PWR9-NEXT: mr r4, r3 ; CHECK-PWR9-NEXT: addi r1, r1, 48 ; CHECK-PWR9-NEXT: ld r0, 16(r1) ; CHECK-PWR9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-PWR9-NEXT: mtlr r0 +; CHECK-PWR9-NEXT: extsw r3, r4 ; CHECK-PWR9-NEXT: blr ; ; CHECK-LABEL: test1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addis r4, r2, a@toc@ha +; CHECK-NEXT: lwa r4, a@toc@l(r4) +; CHECK-NEXT: cmpld r4, r3 +; CHECK-NEXT: # implicit-def: $r4 +; CHECK-NEXT: beq cr0, .LBB0_2 +; CHECK-NEXT: # %bb.1: # %if.end +; CHECK-NEXT: extsw r3, r4 +; CHECK-NEXT: blr +; CHECK-NEXT: .LBB0_2: # %if.then ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: stdu r1, -128(r1) ; CHECK-NEXT: std r0, 144(r1) @@ -48,23 +60,17 @@ ; CHECK-NEXT: .cfi_offset r30, -16 ; CHECK-NEXT: std r30, 112(r1) # 8-byte Folded Spill ; CHECK-NEXT: mr r30, r3 -; CHECK-NEXT: addis r3, r2, a@toc@ha -; CHECK-NEXT: lwa r3, a@toc@l(r3) -; CHECK-NEXT: cmpld r3, r30 -; CHECK-NEXT: # implicit-def: $r3 -; CHECK-NEXT: bne cr0, .LBB0_2 -; CHECK-NEXT: # %bb.1: # %if.then ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: nop ; CHECK-NEXT: mr r3, r30 ; CHECK-NEXT: bl callNonVoid ; CHECK-NEXT: nop -; CHECK-NEXT: .LBB0_2: # %if.end ; CHECK-NEXT: ld r30, 112(r1) # 8-byte Folded Reload -; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: mr r4, r3 ; CHECK-NEXT: addi r1, r1, 128 ; CHECK-NEXT: ld r0, 16(r1) ; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: extsw r3, r4 ; CHECK-NEXT: blr entry: %0 = load i32, ptr @a, align 4, !tbaa !2 diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection-aix.ll @@ -1927,11 +1927,12 @@ ; BE-P10-NEXT: std r0, 16(r1) ; BE-P10-NEXT: hashst r0, -16(r1) ; BE-P10-NEXT: stdu r1, -144(r1) +; BE-P10-NEXT: lwz r4, 12(r3) ; BE-P10-NEXT: std r31, 136(r1) # 8-byte Folded Spill ; BE-P10-NEXT: mr r31, r3 -; BE-P10-NEXT: lwz r3, 12(r3) -; BE-P10-NEXT: stw r3, 124(r1) -; BE-P10-NEXT: addi r3, r1, 124 +; BE-P10-NEXT: stw r4, 124(r1) +; BE-P10-NEXT: addi r4, r1, 124 +; BE-P10-NEXT: mr r3, r4 ; BE-P10-NEXT: bl .callee2[PR] ; BE-P10-NEXT: nop ; BE-P10-NEXT: lwz r4, 16(r31) @@ -1958,9 +1959,10 @@ ; BE-P9-NEXT: hashst r0, -16(r1) ; BE-P9-NEXT: std r31, 136(r1) # 8-byte Folded Spill ; BE-P9-NEXT: mr r31, r3 -; BE-P9-NEXT: lwz r3, 12(r3) -; BE-P9-NEXT: stw r3, 124(r1) -; BE-P9-NEXT: addi r3, r1, 124 +; BE-P9-NEXT: lwz r4, 12(r3) +; BE-P9-NEXT: stw r4, 124(r1) +; BE-P9-NEXT: addi r4, r1, 124 +; BE-P9-NEXT: mr r3, r4 ; BE-P9-NEXT: bl .callee2[PR] ; BE-P9-NEXT: nop ; BE-P9-NEXT: lwz r4, 16(r31) @@ -1987,9 +1989,10 @@ ; BE-P8-NEXT: hashst r0, -16(r1) ; BE-P8-NEXT: std r31, 136(r1) # 8-byte Folded Spill ; BE-P8-NEXT: mr r31, r3 -; BE-P8-NEXT: lwz r3, 12(r3) -; BE-P8-NEXT: stw r3, 124(r1) -; BE-P8-NEXT: addi r3, r1, 124 +; BE-P8-NEXT: lwz r4, 12(r3) +; BE-P8-NEXT: stw r4, 124(r1) +; BE-P8-NEXT: addi r4, r1, 124 +; BE-P8-NEXT: mr r3, r4 ; BE-P8-NEXT: bl .callee2[PR] ; BE-P8-NEXT: nop ; BE-P8-NEXT: lwz r4, 16(r31) @@ -2014,11 +2017,12 @@ ; BE-32BIT-P10-NEXT: stw r0, 8(r1) ; BE-32BIT-P10-NEXT: hashst r0, -16(r1) ; BE-32BIT-P10-NEXT: stwu r1, -80(r1) +; BE-32BIT-P10-NEXT: lwz r4, 12(r3) ; BE-32BIT-P10-NEXT: stw r31, 76(r1) # 4-byte Folded Spill ; BE-32BIT-P10-NEXT: mr r31, r3 -; BE-32BIT-P10-NEXT: lwz r3, 12(r3) -; BE-32BIT-P10-NEXT: stw r3, 60(r1) -; BE-32BIT-P10-NEXT: addi r3, r1, 60 +; BE-32BIT-P10-NEXT: stw r4, 60(r1) +; BE-32BIT-P10-NEXT: addi r4, r1, 60 +; BE-32BIT-P10-NEXT: mr r3, r4 ; BE-32BIT-P10-NEXT: bl .callee2[PR] ; BE-32BIT-P10-NEXT: nop ; BE-32BIT-P10-NEXT: lwz r4, 16(r31) @@ -2044,9 +2048,10 @@ ; BE-32BIT-P9-NEXT: hashst r0, -16(r1) ; BE-32BIT-P9-NEXT: stw r31, 76(r1) # 4-byte Folded Spill ; BE-32BIT-P9-NEXT: mr r31, r3 -; BE-32BIT-P9-NEXT: lwz r3, 12(r3) -; BE-32BIT-P9-NEXT: stw r3, 60(r1) -; BE-32BIT-P9-NEXT: addi r3, r1, 60 +; BE-32BIT-P9-NEXT: lwz r4, 12(r3) +; BE-32BIT-P9-NEXT: stw r4, 60(r1) +; BE-32BIT-P9-NEXT: addi r4, r1, 60 +; BE-32BIT-P9-NEXT: mr r3, r4 ; BE-32BIT-P9-NEXT: bl .callee2[PR] ; BE-32BIT-P9-NEXT: nop ; BE-32BIT-P9-NEXT: lwz r4, 16(r31) @@ -2072,9 +2077,10 @@ ; BE-32BIT-P8-NEXT: hashst r0, -16(r1) ; BE-32BIT-P8-NEXT: stw r31, 76(r1) # 4-byte Folded Spill ; BE-32BIT-P8-NEXT: mr r31, r3 -; BE-32BIT-P8-NEXT: lwz r3, 12(r3) -; BE-32BIT-P8-NEXT: stw r3, 60(r1) -; BE-32BIT-P8-NEXT: addi r3, r1, 60 +; BE-32BIT-P8-NEXT: lwz r4, 12(r3) +; BE-32BIT-P8-NEXT: stw r4, 60(r1) +; BE-32BIT-P8-NEXT: addi r4, r1, 60 +; BE-32BIT-P8-NEXT: mr r3, r4 ; BE-32BIT-P8-NEXT: bl .callee2[PR] ; BE-32BIT-P8-NEXT: nop ; BE-32BIT-P8-NEXT: lwz r4, 16(r31) @@ -2098,11 +2104,12 @@ ; BE-P10-PRIV-NEXT: std r0, 16(r1) ; BE-P10-PRIV-NEXT: hashstp r0, -16(r1) ; BE-P10-PRIV-NEXT: stdu r1, -144(r1) +; BE-P10-PRIV-NEXT: lwz r4, 12(r3) ; BE-P10-PRIV-NEXT: std r31, 136(r1) # 8-byte Folded Spill ; BE-P10-PRIV-NEXT: mr r31, r3 -; BE-P10-PRIV-NEXT: lwz r3, 12(r3) -; BE-P10-PRIV-NEXT: stw r3, 124(r1) -; BE-P10-PRIV-NEXT: addi r3, r1, 124 +; BE-P10-PRIV-NEXT: stw r4, 124(r1) +; BE-P10-PRIV-NEXT: addi r4, r1, 124 +; BE-P10-PRIV-NEXT: mr r3, r4 ; BE-P10-PRIV-NEXT: bl .callee2[PR] ; BE-P10-PRIV-NEXT: nop ; BE-P10-PRIV-NEXT: lwz r4, 16(r31) @@ -2129,9 +2136,10 @@ ; BE-P9-PRIV-NEXT: hashstp r0, -16(r1) ; BE-P9-PRIV-NEXT: std r31, 136(r1) # 8-byte Folded Spill ; BE-P9-PRIV-NEXT: mr r31, r3 -; BE-P9-PRIV-NEXT: lwz r3, 12(r3) -; BE-P9-PRIV-NEXT: stw r3, 124(r1) -; BE-P9-PRIV-NEXT: addi r3, r1, 124 +; BE-P9-PRIV-NEXT: lwz r4, 12(r3) +; BE-P9-PRIV-NEXT: stw r4, 124(r1) +; BE-P9-PRIV-NEXT: addi r4, r1, 124 +; BE-P9-PRIV-NEXT: mr r3, r4 ; BE-P9-PRIV-NEXT: bl .callee2[PR] ; BE-P9-PRIV-NEXT: nop ; BE-P9-PRIV-NEXT: lwz r4, 16(r31) @@ -2158,9 +2166,10 @@ ; BE-P8-PRIV-NEXT: hashstp r0, -16(r1) ; BE-P8-PRIV-NEXT: std r31, 136(r1) # 8-byte Folded Spill ; BE-P8-PRIV-NEXT: mr r31, r3 -; BE-P8-PRIV-NEXT: lwz r3, 12(r3) -; BE-P8-PRIV-NEXT: stw r3, 124(r1) -; BE-P8-PRIV-NEXT: addi r3, r1, 124 +; BE-P8-PRIV-NEXT: lwz r4, 12(r3) +; BE-P8-PRIV-NEXT: stw r4, 124(r1) +; BE-P8-PRIV-NEXT: addi r4, r1, 124 +; BE-P8-PRIV-NEXT: mr r3, r4 ; BE-P8-PRIV-NEXT: bl .callee2[PR] ; BE-P8-PRIV-NEXT: nop ; BE-P8-PRIV-NEXT: lwz r4, 16(r31) @@ -2185,11 +2194,12 @@ ; BE-32BIT-P10-PRIV-NEXT: stw r0, 8(r1) ; BE-32BIT-P10-PRIV-NEXT: hashstp r0, -16(r1) ; BE-32BIT-P10-PRIV-NEXT: stwu r1, -80(r1) +; BE-32BIT-P10-PRIV-NEXT: lwz r4, 12(r3) ; BE-32BIT-P10-PRIV-NEXT: stw r31, 76(r1) # 4-byte Folded Spill ; BE-32BIT-P10-PRIV-NEXT: mr r31, r3 -; BE-32BIT-P10-PRIV-NEXT: lwz r3, 12(r3) -; BE-32BIT-P10-PRIV-NEXT: stw r3, 60(r1) -; BE-32BIT-P10-PRIV-NEXT: addi r3, r1, 60 +; BE-32BIT-P10-PRIV-NEXT: stw r4, 60(r1) +; BE-32BIT-P10-PRIV-NEXT: addi r4, r1, 60 +; BE-32BIT-P10-PRIV-NEXT: mr r3, r4 ; BE-32BIT-P10-PRIV-NEXT: bl .callee2[PR] ; BE-32BIT-P10-PRIV-NEXT: nop ; BE-32BIT-P10-PRIV-NEXT: lwz r4, 16(r31) @@ -2215,9 +2225,10 @@ ; BE-32BIT-P9-PRIV-NEXT: hashstp r0, -16(r1) ; BE-32BIT-P9-PRIV-NEXT: stw r31, 76(r1) # 4-byte Folded Spill ; BE-32BIT-P9-PRIV-NEXT: mr r31, r3 -; BE-32BIT-P9-PRIV-NEXT: lwz r3, 12(r3) -; BE-32BIT-P9-PRIV-NEXT: stw r3, 60(r1) -; BE-32BIT-P9-PRIV-NEXT: addi r3, r1, 60 +; BE-32BIT-P9-PRIV-NEXT: lwz r4, 12(r3) +; BE-32BIT-P9-PRIV-NEXT: stw r4, 60(r1) +; BE-32BIT-P9-PRIV-NEXT: addi r4, r1, 60 +; BE-32BIT-P9-PRIV-NEXT: mr r3, r4 ; BE-32BIT-P9-PRIV-NEXT: bl .callee2[PR] ; BE-32BIT-P9-PRIV-NEXT: nop ; BE-32BIT-P9-PRIV-NEXT: lwz r4, 16(r31) @@ -2243,9 +2254,10 @@ ; BE-32BIT-P8-PRIV-NEXT: hashstp r0, -16(r1) ; BE-32BIT-P8-PRIV-NEXT: stw r31, 76(r1) # 4-byte Folded Spill ; BE-32BIT-P8-PRIV-NEXT: mr r31, r3 -; BE-32BIT-P8-PRIV-NEXT: lwz r3, 12(r3) -; BE-32BIT-P8-PRIV-NEXT: stw r3, 60(r1) -; BE-32BIT-P8-PRIV-NEXT: addi r3, r1, 60 +; BE-32BIT-P8-PRIV-NEXT: lwz r4, 12(r3) +; BE-32BIT-P8-PRIV-NEXT: stw r4, 60(r1) +; BE-32BIT-P8-PRIV-NEXT: addi r4, r1, 60 +; BE-32BIT-P8-PRIV-NEXT: mr r3, r4 ; BE-32BIT-P8-PRIV-NEXT: bl .callee2[PR] ; BE-32BIT-P8-PRIV-NEXT: nop ; BE-32BIT-P8-PRIV-NEXT: lwz r4, 16(r31) diff --git a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-rop-protection.ll @@ -2864,10 +2864,11 @@ ; LE-P10-NEXT: std r0, 16(r1) ; LE-P10-NEXT: hashst r0, -24(r1) ; LE-P10-NEXT: stdu r1, -64(r1) +; LE-P10-NEXT: lwz r4, 12(r3) ; LE-P10-NEXT: mr r30, r3 -; LE-P10-NEXT: lwz r3, 12(r3) -; LE-P10-NEXT: stw r3, 36(r1) -; LE-P10-NEXT: addi r3, r1, 36 +; LE-P10-NEXT: stw r4, 36(r1) +; LE-P10-NEXT: addi r4, r1, 36 +; LE-P10-NEXT: mr r3, r4 ; LE-P10-NEXT: bl callee2@notoc ; LE-P10-NEXT: lwz r4, 16(r30) ; LE-P10-NEXT: add r3, r4, r3 @@ -2893,9 +2894,10 @@ ; LE-P9-NEXT: std r0, 80(r1) ; LE-P9-NEXT: hashst r0, -24(r1) ; LE-P9-NEXT: mr r30, r3 -; LE-P9-NEXT: lwz r3, 12(r3) -; LE-P9-NEXT: stw r3, 36(r1) -; LE-P9-NEXT: addi r3, r1, 36 +; LE-P9-NEXT: lwz r4, 12(r3) +; LE-P9-NEXT: stw r4, 36(r1) +; LE-P9-NEXT: addi r4, r1, 36 +; LE-P9-NEXT: mr r3, r4 ; LE-P9-NEXT: bl callee2 ; LE-P9-NEXT: nop ; LE-P9-NEXT: lwz r4, 16(r30) @@ -2922,9 +2924,10 @@ ; LE-P8-NEXT: std r0, 80(r1) ; LE-P8-NEXT: hashst r0, -24(r1) ; LE-P8-NEXT: mr r30, r3 -; LE-P8-NEXT: lwz r3, 12(r3) -; LE-P8-NEXT: stw r3, 36(r1) -; LE-P8-NEXT: addi r3, r1, 36 +; LE-P8-NEXT: lwz r4, 12(r3) +; LE-P8-NEXT: stw r4, 36(r1) +; LE-P8-NEXT: addi r4, r1, 36 +; LE-P8-NEXT: mr r3, r4 ; LE-P8-NEXT: bl callee2 ; LE-P8-NEXT: nop ; LE-P8-NEXT: lwz r4, 16(r30) @@ -3045,11 +3048,12 @@ ; BE-P10-NEXT: std r0, 16(r1) ; BE-P10-NEXT: hashst r0, -24(r1) ; BE-P10-NEXT: stdu r1, -144(r1) +; BE-P10-NEXT: lwz r4, 12(r3) ; BE-P10-NEXT: std r30, 128(r1) # 8-byte Folded Spill ; BE-P10-NEXT: mr r30, r3 -; BE-P10-NEXT: lwz r3, 12(r3) -; BE-P10-NEXT: stw r3, 116(r1) -; BE-P10-NEXT: addi r3, r1, 116 +; BE-P10-NEXT: stw r4, 116(r1) +; BE-P10-NEXT: addi r4, r1, 116 +; BE-P10-NEXT: mr r3, r4 ; BE-P10-NEXT: bl callee2 ; BE-P10-NEXT: nop ; BE-P10-NEXT: lwz r4, 16(r30) @@ -3076,9 +3080,10 @@ ; BE-P9-NEXT: hashst r0, -24(r1) ; BE-P9-NEXT: std r30, 128(r1) # 8-byte Folded Spill ; BE-P9-NEXT: mr r30, r3 -; BE-P9-NEXT: lwz r3, 12(r3) -; BE-P9-NEXT: stw r3, 116(r1) -; BE-P9-NEXT: addi r3, r1, 116 +; BE-P9-NEXT: lwz r4, 12(r3) +; BE-P9-NEXT: stw r4, 116(r1) +; BE-P9-NEXT: addi r4, r1, 116 +; BE-P9-NEXT: mr r3, r4 ; BE-P9-NEXT: bl callee2 ; BE-P9-NEXT: nop ; BE-P9-NEXT: lwz r4, 16(r30) @@ -3105,9 +3110,10 @@ ; BE-P8-NEXT: hashst r0, -24(r1) ; BE-P8-NEXT: std r30, 128(r1) # 8-byte Folded Spill ; BE-P8-NEXT: mr r30, r3 -; BE-P8-NEXT: lwz r3, 12(r3) -; BE-P8-NEXT: stw r3, 116(r1) -; BE-P8-NEXT: addi r3, r1, 116 +; BE-P8-NEXT: lwz r4, 12(r3) +; BE-P8-NEXT: stw r4, 116(r1) +; BE-P8-NEXT: addi r4, r1, 116 +; BE-P8-NEXT: mr r3, r4 ; BE-P8-NEXT: bl callee2 ; BE-P8-NEXT: nop ; BE-P8-NEXT: lwz r4, 16(r30) @@ -3133,10 +3139,11 @@ ; BE-32BIT-P10-NEXT: stw r30, 24(r1) # 4-byte Folded Spill ; BE-32BIT-P10-NEXT: beq cr0, .LBB2_2 ; BE-32BIT-P10-NEXT: # %bb.1: # %if.end +; BE-32BIT-P10-NEXT: lwz r4, 12(r3) ; BE-32BIT-P10-NEXT: mr r30, r3 -; BE-32BIT-P10-NEXT: lwz r3, 12(r3) -; BE-32BIT-P10-NEXT: stw r3, 12(r1) -; BE-32BIT-P10-NEXT: addi r3, r1, 12 +; BE-32BIT-P10-NEXT: stw r4, 12(r1) +; BE-32BIT-P10-NEXT: addi r4, r1, 12 +; BE-32BIT-P10-NEXT: mr r3, r4 ; BE-32BIT-P10-NEXT: bl callee2 ; BE-32BIT-P10-NEXT: lwz r4, 16(r30) ; BE-32BIT-P10-NEXT: add r3, r4, r3 @@ -3161,10 +3168,11 @@ ; BE-32BIT-P9-NEXT: stw r30, 24(r1) # 4-byte Folded Spill ; BE-32BIT-P9-NEXT: beq cr0, .LBB2_2 ; BE-32BIT-P9-NEXT: # %bb.1: # %if.end +; BE-32BIT-P9-NEXT: lwz r4, 12(r3) ; BE-32BIT-P9-NEXT: mr r30, r3 -; BE-32BIT-P9-NEXT: lwz r3, 12(r3) -; BE-32BIT-P9-NEXT: stw r3, 12(r1) -; BE-32BIT-P9-NEXT: addi r3, r1, 12 +; BE-32BIT-P9-NEXT: stw r4, 12(r1) +; BE-32BIT-P9-NEXT: addi r4, r1, 12 +; BE-32BIT-P9-NEXT: mr r3, r4 ; BE-32BIT-P9-NEXT: bl callee2 ; BE-32BIT-P9-NEXT: lwz r4, 16(r30) ; BE-32BIT-P9-NEXT: add r3, r4, r3 @@ -3189,10 +3197,11 @@ ; BE-32BIT-P8-NEXT: stw r30, 24(r1) # 4-byte Folded Spill ; BE-32BIT-P8-NEXT: beq cr0, .LBB2_2 ; BE-32BIT-P8-NEXT: # %bb.1: # %if.end +; BE-32BIT-P8-NEXT: lwz r4, 12(r3) ; BE-32BIT-P8-NEXT: mr r30, r3 -; BE-32BIT-P8-NEXT: lwz r3, 12(r3) -; BE-32BIT-P8-NEXT: stw r3, 12(r1) -; BE-32BIT-P8-NEXT: addi r3, r1, 12 +; BE-32BIT-P8-NEXT: stw r4, 12(r1) +; BE-32BIT-P8-NEXT: addi r4, r1, 12 +; BE-32BIT-P8-NEXT: mr r3, r4 ; BE-32BIT-P8-NEXT: bl callee2 ; BE-32BIT-P8-NEXT: lwz r4, 16(r30) ; BE-32BIT-P8-NEXT: add r3, r4, r3 @@ -3217,10 +3226,11 @@ ; LE-P10-PRIV-NEXT: std r0, 16(r1) ; LE-P10-PRIV-NEXT: hashstp r0, -24(r1) ; LE-P10-PRIV-NEXT: stdu r1, -64(r1) +; LE-P10-PRIV-NEXT: lwz r4, 12(r3) ; LE-P10-PRIV-NEXT: mr r30, r3 -; LE-P10-PRIV-NEXT: lwz r3, 12(r3) -; LE-P10-PRIV-NEXT: stw r3, 36(r1) -; LE-P10-PRIV-NEXT: addi r3, r1, 36 +; LE-P10-PRIV-NEXT: stw r4, 36(r1) +; LE-P10-PRIV-NEXT: addi r4, r1, 36 +; LE-P10-PRIV-NEXT: mr r3, r4 ; LE-P10-PRIV-NEXT: bl callee2@notoc ; LE-P10-PRIV-NEXT: lwz r4, 16(r30) ; LE-P10-PRIV-NEXT: add r3, r4, r3 @@ -3246,9 +3256,10 @@ ; LE-P9-PRIV-NEXT: std r0, 80(r1) ; LE-P9-PRIV-NEXT: hashstp r0, -24(r1) ; LE-P9-PRIV-NEXT: mr r30, r3 -; LE-P9-PRIV-NEXT: lwz r3, 12(r3) -; LE-P9-PRIV-NEXT: stw r3, 36(r1) -; LE-P9-PRIV-NEXT: addi r3, r1, 36 +; LE-P9-PRIV-NEXT: lwz r4, 12(r3) +; LE-P9-PRIV-NEXT: stw r4, 36(r1) +; LE-P9-PRIV-NEXT: addi r4, r1, 36 +; LE-P9-PRIV-NEXT: mr r3, r4 ; LE-P9-PRIV-NEXT: bl callee2 ; LE-P9-PRIV-NEXT: nop ; LE-P9-PRIV-NEXT: lwz r4, 16(r30) @@ -3275,9 +3286,10 @@ ; LE-P8-PRIV-NEXT: std r0, 80(r1) ; LE-P8-PRIV-NEXT: hashstp r0, -24(r1) ; LE-P8-PRIV-NEXT: mr r30, r3 -; LE-P8-PRIV-NEXT: lwz r3, 12(r3) -; LE-P8-PRIV-NEXT: stw r3, 36(r1) -; LE-P8-PRIV-NEXT: addi r3, r1, 36 +; LE-P8-PRIV-NEXT: lwz r4, 12(r3) +; LE-P8-PRIV-NEXT: stw r4, 36(r1) +; LE-P8-PRIV-NEXT: addi r4, r1, 36 +; LE-P8-PRIV-NEXT: mr r3, r4 ; LE-P8-PRIV-NEXT: bl callee2 ; LE-P8-PRIV-NEXT: nop ; LE-P8-PRIV-NEXT: lwz r4, 16(r30) @@ -3302,11 +3314,12 @@ ; BE-P10-PRIV-NEXT: std r0, 16(r1) ; BE-P10-PRIV-NEXT: hashstp r0, -24(r1) ; BE-P10-PRIV-NEXT: stdu r1, -144(r1) +; BE-P10-PRIV-NEXT: lwz r4, 12(r3) ; BE-P10-PRIV-NEXT: std r30, 128(r1) # 8-byte Folded Spill ; BE-P10-PRIV-NEXT: mr r30, r3 -; BE-P10-PRIV-NEXT: lwz r3, 12(r3) -; BE-P10-PRIV-NEXT: stw r3, 116(r1) -; BE-P10-PRIV-NEXT: addi r3, r1, 116 +; BE-P10-PRIV-NEXT: stw r4, 116(r1) +; BE-P10-PRIV-NEXT: addi r4, r1, 116 +; BE-P10-PRIV-NEXT: mr r3, r4 ; BE-P10-PRIV-NEXT: bl callee2 ; BE-P10-PRIV-NEXT: nop ; BE-P10-PRIV-NEXT: lwz r4, 16(r30) @@ -3333,9 +3346,10 @@ ; BE-P9-PRIV-NEXT: hashstp r0, -24(r1) ; BE-P9-PRIV-NEXT: std r30, 128(r1) # 8-byte Folded Spill ; BE-P9-PRIV-NEXT: mr r30, r3 -; BE-P9-PRIV-NEXT: lwz r3, 12(r3) -; BE-P9-PRIV-NEXT: stw r3, 116(r1) -; BE-P9-PRIV-NEXT: addi r3, r1, 116 +; BE-P9-PRIV-NEXT: lwz r4, 12(r3) +; BE-P9-PRIV-NEXT: stw r4, 116(r1) +; BE-P9-PRIV-NEXT: addi r4, r1, 116 +; BE-P9-PRIV-NEXT: mr r3, r4 ; BE-P9-PRIV-NEXT: bl callee2 ; BE-P9-PRIV-NEXT: nop ; BE-P9-PRIV-NEXT: lwz r4, 16(r30) @@ -3362,9 +3376,10 @@ ; BE-P8-PRIV-NEXT: hashstp r0, -24(r1) ; BE-P8-PRIV-NEXT: std r30, 128(r1) # 8-byte Folded Spill ; BE-P8-PRIV-NEXT: mr r30, r3 -; BE-P8-PRIV-NEXT: lwz r3, 12(r3) -; BE-P8-PRIV-NEXT: stw r3, 116(r1) -; BE-P8-PRIV-NEXT: addi r3, r1, 116 +; BE-P8-PRIV-NEXT: lwz r4, 12(r3) +; BE-P8-PRIV-NEXT: stw r4, 116(r1) +; BE-P8-PRIV-NEXT: addi r4, r1, 116 +; BE-P8-PRIV-NEXT: mr r3, r4 ; BE-P8-PRIV-NEXT: bl callee2 ; BE-P8-PRIV-NEXT: nop ; BE-P8-PRIV-NEXT: lwz r4, 16(r30) diff --git a/llvm/test/CodeGen/PowerPC/subreg-postra.ll b/llvm/test/CodeGen/PowerPC/subreg-postra.ll --- a/llvm/test/CodeGen/PowerPC/subreg-postra.ll +++ b/llvm/test/CodeGen/PowerPC/subreg-postra.ll @@ -151,7 +151,7 @@ ; CHECK: stdcx. ; CHECK: iselgt {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} ; CHECK-NO-ISEL: bc 12, 1, [[TRUE:.LBB[0-9]+]] -; CHECK-NO-ISEL: ori 29, 3, 0 +; CHECK-NO-ISEL: ori 5, 3, 0 ; CHECK-NO-ISEL: b [[SUCCESSOR:.LBB[0-9]+]] diff --git a/llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll b/llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll --- a/llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll +++ b/llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll @@ -165,8 +165,7 @@ ; CHECK-NEXT: .cfi_offset r30, -16 ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill ; CHECK-NEXT: stdu 1, -48(1) -; CHECK-NEXT: mr 30, 3 -; CHECK-NEXT: andi. 3, 3, 1 +; CHECK-NEXT: andi. 4, 3, 1 ; CHECK-NEXT: std 0, 64(1) ; CHECK-NEXT: bc 12, 1, .LBB2_3 ; CHECK-NEXT: .LBB2_1: # %v @@ -181,18 +180,20 @@ ; CHECK-NEXT: mtlr 0 ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB2_3: # %bb -; CHECK-NEXT: andi. 3, 30, 2 +; CHECK-NEXT: andi. 4, 3, 2 ; CHECK-NEXT: bne 0, .LBB2_5 ; CHECK-NEXT: # %bb.4: # %succ -; CHECK-NEXT: andi. 3, 30, 4 +; CHECK-NEXT: andi. 3, 3, 4 ; CHECK-NEXT: beq 0, .LBB2_2 ; CHECK-NEXT: b .LBB2_1 ; CHECK-NEXT: .LBB2_5: # %c +; CHECK-NEXT: mr 30, 3 ; CHECK-NEXT: bl c ; CHECK-NEXT: nop ; CHECK-NEXT: bl c ; CHECK-NEXT: nop -; CHECK-NEXT: andi. 3, 30, 4 +; CHECK-NEXT: mr 3, 30 +; CHECK-NEXT: andi. 3, 3, 4 ; CHECK-NEXT: beq 0, .LBB2_2 ; CHECK-NEXT: b .LBB2_1 entry: diff --git a/llvm/test/CodeGen/PowerPC/tls-cse.ll b/llvm/test/CodeGen/PowerPC/tls-cse.ll --- a/llvm/test/CodeGen/PowerPC/tls-cse.ll +++ b/llvm/test/CodeGen/PowerPC/tls-cse.ll @@ -44,6 +44,6 @@ ; CHECK: addis [[REG1:[0-9]+]], 2, _ZL20PrettyStackTraceHead@got@tlsld@ha ; CHECK: addi 3, [[REG1]], _ZL20PrettyStackTraceHead@got@tlsld@l ; CHECK: bl __tls_get_addr(_ZL20PrettyStackTraceHead@tlsld) -; CHECK: addis 3, 3, _ZL20PrettyStackTraceHead@dtprel@ha -; CHECK: ld {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3) -; CHECK: std {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3) +; CHECK: addis [[REG2:[0-9]+]], 3, _ZL20PrettyStackTraceHead@dtprel@ha +; CHECK: ld {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l([[REG2]]) +; CHECK: std {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l([[REG2]]) diff --git a/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll b/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll --- a/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll +++ b/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll @@ -100,24 +100,20 @@ ; ; RV64IZFINXZDINX-LABEL: func: ; RV64IZFINXZDINX: # %bb.0: # %entry -; RV64IZFINXZDINX-NEXT: addi sp, sp, -16 -; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64IZFINXZDINX-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64IZFINXZDINX-NEXT: sext.w a2, a1 -; RV64IZFINXZDINX-NEXT: mv s0, a0 ; RV64IZFINXZDINX-NEXT: beqz a2, .LBB0_2 ; RV64IZFINXZDINX-NEXT: # %bb.1: # %if.else +; RV64IZFINXZDINX-NEXT: addi sp, sp, -16 +; RV64IZFINXZDINX-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IZFINXZDINX-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64IZFINXZDINX-NEXT: addiw a1, a1, -1 -; RV64IZFINXZDINX-NEXT: mv a0, s0 +; RV64IZFINXZDINX-NEXT: mv s0, a0 ; RV64IZFINXZDINX-NEXT: call func@plt ; RV64IZFINXZDINX-NEXT: fadd.d a0, a0, s0 -; RV64IZFINXZDINX-NEXT: j .LBB0_3 -; RV64IZFINXZDINX-NEXT: .LBB0_2: # %return -; RV64IZFINXZDINX-NEXT: mv a0, s0 -; RV64IZFINXZDINX-NEXT: .LBB0_3: # %return ; RV64IZFINXZDINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IZFINXZDINX-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64IZFINXZDINX-NEXT: addi sp, sp, 16 +; RV64IZFINXZDINX-NEXT: .LBB0_2: # %return ; RV64IZFINXZDINX-NEXT: ret entry: %cmp = icmp eq i32 %n, 0 diff --git a/llvm/test/CodeGen/RISCV/fmax-fmin.ll b/llvm/test/CodeGen/RISCV/fmax-fmin.ll --- a/llvm/test/CodeGen/RISCV/fmax-fmin.ll +++ b/llvm/test/CodeGen/RISCV/fmax-fmin.ll @@ -99,18 +99,17 @@ ; R32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; R32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; R32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; R32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; R32-NEXT: mv s1, a3 ; R32-NEXT: mv s2, a2 ; R32-NEXT: mv s0, a1 -; R32-NEXT: mv s4, a0 +; R32-NEXT: mv s3, a0 ; R32-NEXT: call __gtdf2@plt -; R32-NEXT: mv s3, s4 -; R32-NEXT: bgtz a0, .LBB3_2 +; R32-NEXT: mv a1, a0 +; R32-NEXT: mv a0, s3 +; R32-NEXT: bgtz a1, .LBB3_2 ; R32-NEXT: # %bb.1: ; R32-NEXT: mv s3, s2 ; R32-NEXT: .LBB3_2: -; R32-NEXT: mv a0, s4 ; R32-NEXT: mv a1, s0 ; R32-NEXT: mv a2, s2 ; R32-NEXT: mv a3, s1 @@ -126,7 +125,6 @@ ; R32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; R32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; R32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; R32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; R32-NEXT: addi sp, sp, 32 ; R32-NEXT: ret ; @@ -250,18 +248,17 @@ ; R32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; R32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; R32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; R32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; R32-NEXT: mv s1, a3 ; R32-NEXT: mv s2, a2 ; R32-NEXT: mv s0, a1 -; R32-NEXT: mv s4, a0 +; R32-NEXT: mv s3, a0 ; R32-NEXT: call __ltdf2@plt -; R32-NEXT: mv s3, s4 -; R32-NEXT: bltz a0, .LBB7_2 +; R32-NEXT: mv a1, a0 +; R32-NEXT: mv a0, s3 +; R32-NEXT: bltz a1, .LBB7_2 ; R32-NEXT: # %bb.1: ; R32-NEXT: mv s3, s2 ; R32-NEXT: .LBB7_2: -; R32-NEXT: mv a0, s4 ; R32-NEXT: mv a1, s0 ; R32-NEXT: mv a2, s2 ; R32-NEXT: mv a3, s1 @@ -277,7 +274,6 @@ ; R32-NEXT: lw s1, 20(sp) # 4-byte Folded Reload ; R32-NEXT: lw s2, 16(sp) # 4-byte Folded Reload ; R32-NEXT: lw s3, 12(sp) # 4-byte Folded Reload -; R32-NEXT: lw s4, 8(sp) # 4-byte Folded Reload ; R32-NEXT: addi sp, sp, 32 ; R32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll --- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll @@ -1002,17 +1002,17 @@ define i1 @usubo_ult_sub_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) { ; RV32-LABEL: usubo_ult_sub_dominates_i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: andi a7, a5, 1 -; RV32-NEXT: beqz a7, .LBB31_5 +; RV32-NEXT: andi a6, a5, 1 +; RV32-NEXT: beqz a6, .LBB31_5 ; RV32-NEXT: # %bb.1: # %t -; RV32-NEXT: mv a6, a0 +; RV32-NEXT: mv a7, a0 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: sub t0, a1, a3 ; RV32-NEXT: sub t0, t0, a0 -; RV32-NEXT: sub a2, a6, a2 +; RV32-NEXT: sub a2, a7, a2 ; RV32-NEXT: sw a2, 0(a4) ; RV32-NEXT: sw t0, 4(a4) -; RV32-NEXT: beqz a7, .LBB31_5 +; RV32-NEXT: beqz a6, .LBB31_5 ; RV32-NEXT: # %bb.2: # %end ; RV32-NEXT: beq a1, a3, .LBB31_4 ; RV32-NEXT: # %bb.3: # %end diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -414,7 +414,7 @@ ; CHECK-LABEL: strided_vpload_v32f64: ; CHECK: # %bb.0: ; CHECK-NEXT: li a4, 16 -; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: mv a3, a2 ; CHECK-NEXT: bltu a2, a4, .LBB33_2 ; CHECK-NEXT: # %bb.1: @@ -427,11 +427,12 @@ ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a5 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v8, 2 +; CHECK-NEXT: vslidedown.vi v8, v9, 2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vlse64.v v16, (a4), a1, v0.t ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vlse64.v v8, (a0), a1, v0.t ; CHECK-NEXT: ret %load = call <32 x double> @llvm.experimental.vp.strided.load.v32f64.p0.i32(ptr %ptr, i32 %stride, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -765,7 +765,7 @@ define @strided_load_nxv16f64(ptr %ptr, i64 %stride, %mask, i32 zeroext %evl) { ; CHECK-RV32-LABEL: strided_load_nxv16f64: ; CHECK-RV32: # %bb.0: -; CHECK-RV32-NEXT: vmv1r.v v8, v0 +; CHECK-RV32-NEXT: vmv1r.v v9, v0 ; CHECK-RV32-NEXT: csrr a4, vlenb ; CHECK-RV32-NEXT: sub a2, a3, a4 ; CHECK-RV32-NEXT: sltu a5, a3, a2 @@ -779,17 +779,18 @@ ; CHECK-RV32-NEXT: add a5, a0, a5 ; CHECK-RV32-NEXT: srli a4, a4, 3 ; CHECK-RV32-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a4 +; CHECK-RV32-NEXT: vslidedown.vx v8, v9, a4 ; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-RV32-NEXT: vmv1r.v v0, v8 ; CHECK-RV32-NEXT: vlse64.v v16, (a5), a1, v0.t ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-RV32-NEXT: vmv1r.v v0, v8 +; CHECK-RV32-NEXT: vmv1r.v v0, v9 ; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: strided_load_nxv16f64: ; CHECK-RV64: # %bb.0: -; CHECK-RV64-NEXT: vmv1r.v v8, v0 +; CHECK-RV64-NEXT: vmv1r.v v9, v0 ; CHECK-RV64-NEXT: csrr a4, vlenb ; CHECK-RV64-NEXT: sub a3, a2, a4 ; CHECK-RV64-NEXT: sltu a5, a2, a3 @@ -803,11 +804,12 @@ ; CHECK-RV64-NEXT: add a5, a0, a5 ; CHECK-RV64-NEXT: srli a4, a4, 3 ; CHECK-RV64-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a4 +; CHECK-RV64-NEXT: vslidedown.vx v8, v9, a4 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-RV64-NEXT: vmv1r.v v0, v8 ; CHECK-RV64-NEXT: vlse64.v v16, (a5), a1, v0.t ; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-RV64-NEXT: vmv1r.v v0, v8 +; CHECK-RV64-NEXT: vmv1r.v v0, v9 ; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t ; CHECK-RV64-NEXT: ret %v = call @llvm.experimental.vp.strided.load.nxv16f64.p0.i64(ptr %ptr, i64 %stride, %mask, i32 %evl) diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll --- a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll @@ -33,7 +33,8 @@ if.end: ; preds = %entry %mul = mul nsw i32 %a, 11 - %sub = sub nsw i32 %mul, %b + %xor = xor i32 %mul, 100 + %sub = sub nsw i32 %xor, %b %call = tail call i32 @h(i32 %sub, i32 %b) %add = add nsw i32 %call, %b %mul1 = mul nsw i32 %add, %call @@ -81,7 +82,8 @@ if.end: ; preds = %entry %mul = mul nsw i32 %a, 11 - %sub = sub nsw i32 %mul, %b + %xor = xor i32 %mul, 100 + %sub = sub nsw i32 %xor, %b %call = tail call i32 @h(i32 %sub, i32 %b) %add = add nsw i32 %call, %b %mul1 = mul nsw i32 %add, %call diff --git a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll --- a/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll +++ b/llvm/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll @@ -12,10 +12,10 @@ ; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl %esi -; CHECK-NEXT: movl %edx, %esi +; CHECK-NEXT: pushl %eax ; CHECK-NEXT: movb $1, %cl ; CHECK-NEXT: movl $1, %ebx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi ; CHECK-NEXT: .p2align 4, 0x90 ; CHECK-NEXT: .LBB0_1: # %bb.i5 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -25,11 +25,11 @@ ; CHECK-NEXT: testb $1, %al ; CHECK-NEXT: jne .LBB0_1 ; CHECK-NEXT: # %bb.2: # %mp_unexp_mp2d.exit.i -; CHECK-NEXT: je .LBB0_10 -; CHECK-NEXT: # %bb.3: # %cond_next.i +; CHECK-NEXT: je .LBB0_3 +; CHECK-NEXT: # %bb.5: # %cond_next.i ; CHECK-NEXT: testb $1, %al -; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: # %bb.4: # %cond_next36.i +; CHECK-NEXT: jne .LBB0_3 +; CHECK-NEXT: # %bb.6: # %cond_next36.i ; CHECK-NEXT: movl $0, 0 ; CHECK-NEXT: movzbl %al, %ebp ; CHECK-NEXT: andl $1, %ebp @@ -38,25 +38,26 @@ ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: xorpd %xmm1, %xmm1 ; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_5: # %bb.i28.i +; CHECK-NEXT: .LBB0_7: # %bb.i28.i ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: cvttsd2si %xmm1, %edx -; CHECK-NEXT: cmpl %esi, %edx -; CHECK-NEXT: cmovgel %eax, %edx +; CHECK-NEXT: cvttsd2si %xmm1, %edi +; CHECK-NEXT: cmpl %edx, %edi +; CHECK-NEXT: cmovgel %eax, %edi ; CHECK-NEXT: addl $2, %ecx ; CHECK-NEXT: xorps %xmm2, %xmm2 -; CHECK-NEXT: cvtsi2sd %edx, %xmm2 +; CHECK-NEXT: cvtsi2sd %edi, %xmm2 ; CHECK-NEXT: xorpd %xmm1, %xmm1 ; CHECK-NEXT: subsd %xmm2, %xmm1 ; CHECK-NEXT: mulsd %xmm0, %xmm1 ; CHECK-NEXT: addl $-2, %ebp -; CHECK-NEXT: jne .LBB0_5 -; CHECK-NEXT: # %bb.6: # %mp_unexp_d2mp.exit29.i +; CHECK-NEXT: jne .LBB0_7 +; CHECK-NEXT: # %bb.8: # %mp_unexp_d2mp.exit29.i ; CHECK-NEXT: movl $0, 0 -; CHECK-NEXT: je .LBB0_7 -; CHECK-NEXT: # %bb.8: # %mp_sqrt_init.exit +; CHECK-NEXT: je .LBB0_9 +; CHECK-NEXT: # %bb.10: # %mp_sqrt_init.exit ; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: movl %edx, %edi +; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: calll mp_mul_csqu@PLT ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl $-1, %edx @@ -66,17 +67,31 @@ ; CHECK-NEXT: calll rdft@PLT ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: xorl %ecx, %ecx -; CHECK-NEXT: movl %esi, %edx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: pushl %eax +; CHECK-NEXT: movl %edi, (%esp) # 4-byte Spill +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi ; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %esi ; CHECK-NEXT: pushl $0 ; CHECK-NEXT: calll mp_mul_d2i@PLT ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: testl %ebp, %ebp -; CHECK-NEXT: jne .LBB0_10 -; CHECK-NEXT: # %bb.9: # %cond_false.i +; CHECK-NEXT: je .LBB0_11 +; CHECK-NEXT: .LBB0_3: # %cond_true.i +; CHECK-NEXT: addl $4, %esp +; CHECK-NEXT: .LBB0_4: # %cond_true.i +; CHECK-NEXT: popl %esi +; CHECK-NEXT: popl %edi +; CHECK-NEXT: popl %ebx +; CHECK-NEXT: popl %ebp +; CHECK-NEXT: retl +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_9: # %bb.i.i +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB0_9 +; CHECK-NEXT: .LBB0_11: # %cond_false.i ; CHECK-NEXT: xorl %ecx, %ecx +; CHECK-NEXT: movl (%esp), %esi # 4-byte Reload ; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: pushl {{[0-9]+}}(%esp) ; CHECK-NEXT: pushl $0 @@ -84,43 +99,33 @@ ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl %esi, %edx -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edi -; CHECK-NEXT: pushl %edi ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ebp ; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: calll mp_add@PLT ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl %esi, %edx -; CHECK-NEXT: pushl %ebp -; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl {{[0-9]+}}(%esp) ; CHECK-NEXT: calll mp_sub@PLT ; CHECK-NEXT: addl $12, %esp ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl %esi, %edx -; CHECK-NEXT: pushl %edi +; CHECK-NEXT: pushl %ebp ; CHECK-NEXT: pushl $0 ; CHECK-NEXT: calll mp_round@PLT ; CHECK-NEXT: addl $8, %esp ; CHECK-NEXT: xorl %ecx, %ecx ; CHECK-NEXT: movl %esi, %edx -; CHECK-NEXT: pushl %ebp +; CHECK-NEXT: pushl %edi ; CHECK-NEXT: pushl {{[0-9]+}}(%esp) ; CHECK-NEXT: pushl %ebx ; CHECK-NEXT: calll mp_mul_d2i@PLT -; CHECK-NEXT: addl $12, %esp -; CHECK-NEXT: .LBB0_10: # %cond_true.i -; CHECK-NEXT: popl %esi -; CHECK-NEXT: popl %edi -; CHECK-NEXT: popl %ebx -; CHECK-NEXT: popl %ebp -; CHECK-NEXT: retl -; CHECK-NEXT: .p2align 4, 0x90 -; CHECK-NEXT: .LBB0_7: # %bb.i.i -; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: jmp .LBB0_7 +; CHECK-NEXT: addl $16, %esp +; CHECK-NEXT: jmp .LBB0_4 entry: br label %bb.i5 diff --git a/llvm/test/CodeGen/X86/cgp-usubo.ll b/llvm/test/CodeGen/X86/cgp-usubo.ll --- a/llvm/test/CodeGen/X86/cgp-usubo.ll +++ b/llvm/test/CodeGen/X86/cgp-usubo.ll @@ -172,11 +172,11 @@ ; CHECK-NEXT: je .LBB9_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movq %rdx, %rbx -; CHECK-NEXT: movq %rsi, %r15 ; CHECK-NEXT: movq %rdi, %r14 ; CHECK-NEXT: xorl %edi, %edi ; CHECK-NEXT: cmpq %rsi, %r14 ; CHECK-NEXT: setb %dil +; CHECK-NEXT: movq %rsi, %r15 ; CHECK-NEXT: callq call@PLT ; CHECK-NEXT: subq %r15, %r14 ; CHECK-NEXT: jae .LBB9_2 diff --git a/llvm/test/CodeGen/X86/csr-split.ll b/llvm/test/CodeGen/X86/csr-split.ll --- a/llvm/test/CodeGen/X86/csr-split.ll +++ b/llvm/test/CodeGen/X86/csr-split.ll @@ -66,23 +66,20 @@ define dso_local signext i32 @test2(ptr %p1) local_unnamed_addr { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset %rbx, -16 ; CHECK-NEXT: testq %rdi, %rdi ; CHECK-NEXT: je .LBB1_2 ; CHECK-NEXT: # %bb.1: # %if.end -; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: movslq a(%rip), %rax ; CHECK-NEXT: cmpq %rdi, %rax ; CHECK-NEXT: je .LBB1_3 ; CHECK-NEXT: .LBB1_2: # %return ; CHECK-NEXT: xorl %eax, %eax -; CHECK-NEXT: popq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq ; CHECK-NEXT: .LBB1_3: # %if.then2 +; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: callq callVoid@PLT ; CHECK-NEXT: movq %rbx, %rdi ; CHECK-NEXT: popq %rbx diff --git a/llvm/test/CodeGen/X86/ragreedy-bug.ll b/llvm/test/CodeGen/X86/ragreedy-bug.ll --- a/llvm/test/CodeGen/X86/ragreedy-bug.ll +++ b/llvm/test/CodeGen/X86/ragreedy-bug.ll @@ -22,13 +22,13 @@ ; CHECK: cond.false.i.i ; CHECK: maskrune ; CHECK-NEXT: movzbl -; CHECK-NEXT: movzbl +; CHECK-NEXT: movq ; CHECK-NEXT: testl ; CHECK-NEXT: je ; CHECK: cond.false.i.i219 ; CHECK: maskrune ; CHECK-NEXT: movzbl -; CHECK-NEXT: movzbl +; CHECK-NEXT: movq ; CHECK-NEXT: testl ; CHECK-NEXT: jne diff --git a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll --- a/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll +++ b/llvm/test/CodeGen/X86/ragreedy-hoist-spill.ll @@ -65,18 +65,20 @@ ; CHECK-NEXT: testb %al, %al ; CHECK-NEXT: je LBB0_54 ; CHECK-NEXT: ## %bb.6: ## %SyTime.exit2720 -; CHECK-NEXT: movq %rdx, %r14 -; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rax ; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rcx ; CHECK-NEXT: cmpq %rax, %rcx ; CHECK-NEXT: jae LBB0_8 ; CHECK-NEXT: ## %bb.7: ## %for.body.lr.ph +; CHECK-NEXT: movq %rdx, %rbx ; CHECK-NEXT: movl $512, %edx ## imm = 0x200 ; CHECK-NEXT: movl $32, %esi +; CHECK-NEXT: movq %rdi, %r14 ; CHECK-NEXT: callq _memset +; CHECK-NEXT: movq %r14, %rdi +; CHECK-NEXT: movq %rbx, %rdx ; CHECK-NEXT: LBB0_8: ## %while.body.preheader -; CHECK-NEXT: imulq $1040, %r14, %rax ## imm = 0x410 +; CHECK-NEXT: imulq $1040, %rdx, %rax ## imm = 0x410 ; CHECK-NEXT: movq _syBuf@GOTPCREL(%rip), %rcx ; CHECK-NEXT: leaq 8(%rcx,%rax), %rdx ; CHECK-NEXT: movl $1, %r13d @@ -90,7 +92,7 @@ ; CHECK-NEXT: jne LBB0_9 ; CHECK-NEXT: ## %bb.10: ## %do.end ; CHECK-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill -; CHECK-NEXT: movq %rbx, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill +; CHECK-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) ## 8-byte Spill ; CHECK-NEXT: xorl %ebp, %ebp ; CHECK-NEXT: testb %bpl, %bpl ; CHECK-NEXT: jne LBB0_11 diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening.ll b/llvm/test/CodeGen/X86/speculative-load-hardening.ll --- a/llvm/test/CodeGen/X86/speculative-load-hardening.ll +++ b/llvm/test/CodeGen/X86/speculative-load-hardening.ll @@ -42,16 +42,16 @@ ; X64-NEXT: .cfi_offset %r14, -24 ; X64-NEXT: .cfi_offset %r15, -16 ; X64-NEXT: movq %rsp, %rax -; X64-NEXT: movq $-1, %r14 +; X64-NEXT: movq $-1, %rbx ; X64-NEXT: sarq $63, %rax ; X64-NEXT: testl %edi, %edi ; X64-NEXT: jne .LBB1_1 ; X64-NEXT: # %bb.2: # %then1 -; X64-NEXT: cmovneq %r14, %rax +; X64-NEXT: cmovneq %rbx, %rax ; X64-NEXT: testl %esi, %esi ; X64-NEXT: je .LBB1_4 ; X64-NEXT: .LBB1_1: -; X64-NEXT: cmoveq %r14, %rax +; X64-NEXT: cmoveq %rbx, %rax ; X64-NEXT: .LBB1_8: # %exit ; X64-NEXT: shlq $47, %rax ; X64-NEXT: orq %rax, %rsp @@ -64,42 +64,43 @@ ; X64-NEXT: retq ; X64-NEXT: .LBB1_4: # %then2 ; X64-NEXT: .cfi_def_cfa_offset 32 -; X64-NEXT: movq %r8, %rbx -; X64-NEXT: cmovneq %r14, %rax +; X64-NEXT: cmovneq %rbx, %rax ; X64-NEXT: testl %edx, %edx ; X64-NEXT: je .LBB1_6 ; X64-NEXT: # %bb.5: # %else3 -; X64-NEXT: cmoveq %r14, %rax +; X64-NEXT: cmoveq %rbx, %rax ; X64-NEXT: movslq (%r9), %rcx ; X64-NEXT: orq %rax, %rcx -; X64-NEXT: leaq (%rbx,%rcx,4), %r15 -; X64-NEXT: movl %ecx, (%rbx,%rcx,4) +; X64-NEXT: leaq (%r8,%rcx,4), %r14 +; X64-NEXT: movl %ecx, (%r8,%rcx,4) ; X64-NEXT: jmp .LBB1_7 ; X64-NEXT: .LBB1_6: # %then3 -; X64-NEXT: cmovneq %r14, %rax +; X64-NEXT: cmovneq %rbx, %rax ; X64-NEXT: movl (%rcx), %ecx -; X64-NEXT: addl (%rbx), %ecx +; X64-NEXT: addl (%r8), %ecx ; X64-NEXT: movslq %ecx, %rdi ; X64-NEXT: orq %rax, %rdi -; X64-NEXT: movl (%rbx,%rdi,4), %esi +; X64-NEXT: movl (%r8,%rdi,4), %esi ; X64-NEXT: orl %eax, %esi -; X64-NEXT: movq (%r9), %r15 -; X64-NEXT: orq %rax, %r15 -; X64-NEXT: addl (%r15), %esi +; X64-NEXT: movq (%r9), %r14 +; X64-NEXT: orq %rax, %r14 +; X64-NEXT: addl (%r14), %esi ; X64-NEXT: shlq $47, %rax ; X64-NEXT: # kill: def $edi killed $edi killed $rdi ; X64-NEXT: orq %rax, %rsp +; X64-NEXT: movq %r8, %r15 ; X64-NEXT: callq leak@PLT ; X64-NEXT: .Lslh_ret_addr0: +; X64-NEXT: movq %r15, %r8 ; X64-NEXT: movq %rsp, %rax ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx ; X64-NEXT: sarq $63, %rax ; X64-NEXT: cmpq $.Lslh_ret_addr0, %rcx -; X64-NEXT: cmovneq %r14, %rax +; X64-NEXT: cmovneq %rbx, %rax ; X64-NEXT: .LBB1_7: # %merge -; X64-NEXT: movslq (%r15), %rcx +; X64-NEXT: movslq (%r14), %rcx ; X64-NEXT: orq %rax, %rcx -; X64-NEXT: movl $0, (%rbx,%rcx,4) +; X64-NEXT: movl $0, (%r8,%rcx,4) ; X64-NEXT: jmp .LBB1_8 ; ; X64-LFENCE-LABEL: test_basic_conditions: @@ -119,29 +120,30 @@ ; X64-LFENCE-NEXT: testl %esi, %esi ; X64-LFENCE-NEXT: jne .LBB1_6 ; X64-LFENCE-NEXT: # %bb.2: # %then2 -; X64-LFENCE-NEXT: movq %r8, %rbx ; X64-LFENCE-NEXT: lfence ; X64-LFENCE-NEXT: testl %edx, %edx ; X64-LFENCE-NEXT: je .LBB1_3 ; X64-LFENCE-NEXT: # %bb.4: # %else3 ; X64-LFENCE-NEXT: lfence ; X64-LFENCE-NEXT: movslq (%r9), %rax -; X64-LFENCE-NEXT: leaq (%rbx,%rax,4), %r14 -; X64-LFENCE-NEXT: movl %eax, (%rbx,%rax,4) +; X64-LFENCE-NEXT: leaq (%r8,%rax,4), %rbx +; X64-LFENCE-NEXT: movl %eax, (%r8,%rax,4) ; X64-LFENCE-NEXT: jmp .LBB1_5 ; X64-LFENCE-NEXT: .LBB1_3: # %then3 ; X64-LFENCE-NEXT: lfence ; X64-LFENCE-NEXT: movl (%rcx), %eax -; X64-LFENCE-NEXT: addl (%rbx), %eax +; X64-LFENCE-NEXT: addl (%r8), %eax ; X64-LFENCE-NEXT: movslq %eax, %rdi -; X64-LFENCE-NEXT: movl (%rbx,%rdi,4), %esi -; X64-LFENCE-NEXT: movq (%r9), %r14 -; X64-LFENCE-NEXT: addl (%r14), %esi +; X64-LFENCE-NEXT: movl (%r8,%rdi,4), %esi +; X64-LFENCE-NEXT: movq (%r9), %rbx +; X64-LFENCE-NEXT: addl (%rbx), %esi ; X64-LFENCE-NEXT: # kill: def $edi killed $edi killed $rdi +; X64-LFENCE-NEXT: movq %r8, %r14 ; X64-LFENCE-NEXT: callq leak@PLT +; X64-LFENCE-NEXT: movq %r14, %r8 ; X64-LFENCE-NEXT: .LBB1_5: # %merge -; X64-LFENCE-NEXT: movslq (%r14), %rax -; X64-LFENCE-NEXT: movl $0, (%rbx,%rax,4) +; X64-LFENCE-NEXT: movslq (%rbx), %rax +; X64-LFENCE-NEXT: movl $0, (%r8,%rax,4) ; X64-LFENCE-NEXT: .LBB1_6: # %exit ; X64-LFENCE-NEXT: lfence ; X64-LFENCE-NEXT: addq $8, %rsp @@ -501,6 +503,142 @@ declare void @__cxa_throw(ptr, ptr, ptr) local_unnamed_addr define void @test_basic_eh(i32 %a, ptr %ptr1, ptr %ptr2) speculative_load_hardening personality ptr @__gxx_personality_v0 { +; X64-LABEL: test_basic_eh: +; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rbp +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: pushq %r15 +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: pushq %r14 +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: pushq %rbx +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 48 +; X64-NEXT: .cfi_offset %rbx, -40 +; X64-NEXT: .cfi_offset %r14, -32 +; X64-NEXT: .cfi_offset %r15, -24 +; X64-NEXT: .cfi_offset %rbp, -16 +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq $-1, %rbx +; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpl $41, %edi +; X64-NEXT: jg .LBB4_1 +; X64-NEXT: # %bb.2: # %thrower +; X64-NEXT: movq %rdx, %r14 +; X64-NEXT: cmovgq %rbx, %rax +; X64-NEXT: movslq %edi, %rcx +; X64-NEXT: movq %rsi, %r15 +; X64-NEXT: movl (%rsi,%rcx,4), %ebp +; X64-NEXT: orl %eax, %ebp +; X64-NEXT: movl $4, %edi +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: callq __cxa_allocate_exception@PLT +; X64-NEXT: .Lslh_ret_addr4: +; X64-NEXT: movq %rsp, %rcx +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx +; X64-NEXT: sarq $63, %rcx +; X64-NEXT: cmpq $.Lslh_ret_addr4, %rdx +; X64-NEXT: cmovneq %rbx, %rcx +; X64-NEXT: movl %ebp, (%rax) +; X64-NEXT: .Ltmp0: +; X64-NEXT: shlq $47, %rcx +; X64-NEXT: movq %rax, %rdi +; X64-NEXT: xorl %esi, %esi +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: orq %rcx, %rsp +; X64-NEXT: callq __cxa_throw@PLT +; X64-NEXT: .Lslh_ret_addr5: +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr5, %rcx +; X64-NEXT: cmovneq %rbx, %rax +; X64-NEXT: .Ltmp1: +; X64-NEXT: jmp .LBB4_3 +; X64-NEXT: .LBB4_1: +; X64-NEXT: cmovleq %rbx, %rax +; X64-NEXT: .LBB4_3: # %exit +; X64-NEXT: shlq $47, %rax +; X64-NEXT: orq %rax, %rsp +; X64-NEXT: addq $8, %rsp +; X64-NEXT: .cfi_def_cfa_offset 40 +; X64-NEXT: popq %rbx +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: popq %r14 +; X64-NEXT: .cfi_def_cfa_offset 24 +; X64-NEXT: popq %r15 +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: popq %rbp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; X64-NEXT: .LBB4_4: # %lpad +; X64-NEXT: .cfi_def_cfa_offset 48 +; X64-NEXT: .Ltmp2: +; X64-NEXT: movq %rsp, %rcx +; X64-NEXT: sarq $63, %rcx +; X64-NEXT: movl (%rax), %eax +; X64-NEXT: addl (%r15), %eax +; X64-NEXT: cltq +; X64-NEXT: orq %rcx, %rax +; X64-NEXT: movl (%r14,%rax,4), %edi +; X64-NEXT: orl %ecx, %edi +; X64-NEXT: shlq $47, %rcx +; X64-NEXT: orq %rcx, %rsp +; X64-NEXT: callq sink@PLT +; X64-NEXT: .Lslh_ret_addr6: +; X64-NEXT: movq %rsp, %rax +; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx +; X64-NEXT: sarq $63, %rax +; X64-NEXT: cmpq $.Lslh_ret_addr6, %rcx +; X64-NEXT: cmovneq %rbx, %rax +; +; X64-LFENCE-LABEL: test_basic_eh: +; X64-LFENCE: # %bb.0: # %entry +; X64-LFENCE-NEXT: pushq %rbp +; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16 +; X64-LFENCE-NEXT: pushq %r14 +; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24 +; X64-LFENCE-NEXT: pushq %rbx +; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32 +; X64-LFENCE-NEXT: .cfi_offset %rbx, -32 +; X64-LFENCE-NEXT: .cfi_offset %r14, -24 +; X64-LFENCE-NEXT: .cfi_offset %rbp, -16 +; X64-LFENCE-NEXT: cmpl $41, %edi +; X64-LFENCE-NEXT: jg .LBB4_2 +; X64-LFENCE-NEXT: # %bb.1: # %thrower +; X64-LFENCE-NEXT: movq %rdx, %rbx +; X64-LFENCE-NEXT: movq %rsi, %r14 +; X64-LFENCE-NEXT: lfence +; X64-LFENCE-NEXT: movslq %edi, %rax +; X64-LFENCE-NEXT: movl (%rsi,%rax,4), %ebp +; X64-LFENCE-NEXT: movl $4, %edi +; X64-LFENCE-NEXT: callq __cxa_allocate_exception@PLT +; X64-LFENCE-NEXT: movl %ebp, (%rax) +; X64-LFENCE-NEXT: .Ltmp0: +; X64-LFENCE-NEXT: movq %rax, %rdi +; X64-LFENCE-NEXT: xorl %esi, %esi +; X64-LFENCE-NEXT: xorl %edx, %edx +; X64-LFENCE-NEXT: callq __cxa_throw@PLT +; X64-LFENCE-NEXT: .Ltmp1: +; X64-LFENCE-NEXT: .LBB4_2: # %exit +; X64-LFENCE-NEXT: lfence +; X64-LFENCE-NEXT: popq %rbx +; X64-LFENCE-NEXT: .cfi_def_cfa_offset 24 +; X64-LFENCE-NEXT: popq %r14 +; X64-LFENCE-NEXT: .cfi_def_cfa_offset 16 +; X64-LFENCE-NEXT: popq %rbp +; X64-LFENCE-NEXT: .cfi_def_cfa_offset 8 +; X64-LFENCE-NEXT: retq +; X64-LFENCE-NEXT: .LBB4_3: # %lpad +; X64-LFENCE-NEXT: .cfi_def_cfa_offset 32 +; X64-LFENCE-NEXT: .Ltmp2: +; X64-LFENCE-NEXT: movl (%rax), %eax +; X64-LFENCE-NEXT: addl (%r14), %eax +; X64-LFENCE-NEXT: cltq +; X64-LFENCE-NEXT: movl (%rbx,%rax,4), %edi +; X64-LFENCE-NEXT: callq sink@PLT entry: %a.cmp = icmp slt i32 %a, 42 br i1 %a.cmp, label %thrower, label %exit diff --git a/llvm/test/CodeGen/X86/split-reg-with-hint.ll b/llvm/test/CodeGen/X86/split-reg-with-hint.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/split-reg-with-hint.ll @@ -0,0 +1,80 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s + +; %ptr has a hint to %rdi in entry block, it also has a interference with %rdi +; in block if.then. It should be split in cold block if.then. +; Similarly %p2, %p3, %p4, %p5 and %p6 should also be split in cold block +; if.then. + +define ptr @foo(ptr %ptr, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6) { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: testq %rdi, %rdi +; CHECK-NEXT: je .LBB0_1 +; CHECK-NEXT: # %bb.2: # %if.end +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: jmp qux@PLT # TAILCALL +; CHECK-NEXT: .LBB0_1: # %if.then +; CHECK-NEXT: pushq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: pushq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: pushq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset %rbx, -48 +; CHECK-NEXT: .cfi_offset %r12, -40 +; CHECK-NEXT: .cfi_offset %r13, -32 +; CHECK-NEXT: .cfi_offset %r14, -24 +; CHECK-NEXT: .cfi_offset %r15, -16 +; CHECK-NEXT: movq %rsi, %rbx +; CHECK-NEXT: movq %rdx, %r14 +; CHECK-NEXT: movq %rcx, %r15 +; CHECK-NEXT: movq %r8, %r12 +; CHECK-NEXT: movq %r9, %r13 +; CHECK-NEXT: callq bar@PLT +; CHECK-NEXT: movq %rbx, %rsi +; CHECK-NEXT: movq %r14, %rdx +; CHECK-NEXT: movq %r15, %rcx +; CHECK-NEXT: movq %r12, %r8 +; CHECK-NEXT: movq %r13, %r9 +; CHECK-NEXT: movq %rax, %rdi +; CHECK-NEXT: popq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; CHECK-NEXT: popq %r12 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: popq %r13 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: popq %r14 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: popq %r15 +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_restore %rbx +; CHECK-NEXT: .cfi_restore %r12 +; CHECK-NEXT: .cfi_restore %r13 +; CHECK-NEXT: .cfi_restore %r14 +; CHECK-NEXT: .cfi_restore %r15 +; CHECK-NEXT: incq %rdi +; CHECK-NEXT: jmp qux@PLT # TAILCALL +entry: + %tobool.not = icmp eq ptr %ptr, null + br i1 %tobool.not, label %if.then, label %if.end, !prof !5 + +if.then: ; preds = %entry + %call = tail call ptr @bar(ptr %ptr, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6) + br label %if.end + +if.end: ; preds = %if.then, %entry + %ptr.addr.0 = phi ptr [ %call, %if.then ], [ %ptr, %entry ] + %incdec.ptr = getelementptr inbounds i8, ptr %ptr.addr.0, i64 1 + %call2 = tail call ptr @qux(ptr %incdec.ptr, i64 %p2, i64 %p3, i64 %p4, i64 %p5, i64 %p6) + ret ptr %call2 +} + +!5 = !{!"branch_weights", i32 1, i32 2000} + +declare ptr @bar(ptr, i64, i64, i64, i64, i64) +declare ptr @qux(ptr, i64, i64, i64, i64, i64) diff --git a/llvm/test/CodeGen/X86/statepoint-call-lowering.ll b/llvm/test/CodeGen/X86/statepoint-call-lowering.ll --- a/llvm/test/CodeGen/X86/statepoint-call-lowering.ll +++ b/llvm/test/CodeGen/X86/statepoint-call-lowering.ll @@ -158,34 +158,29 @@ define i1 @test_cross_bb(ptr addrspace(1) %a, i1 %external_cond) gc "statepoint-example" { ; CHECK-LABEL: test_cross_bb: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rbp -; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: pushq %rbx -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: subq $16, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset %rbx, -24 -; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movl %esi, %ebp -; CHECK-NEXT: movq %rdi, (%rsp) +; CHECK-NEXT: .cfi_offset %rbx, -16 +; CHECK-NEXT: movl %esi, %ebx +; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) ; CHECK-NEXT: callq return_i1@PLT ; CHECK-NEXT: .Ltmp8: -; CHECK-NEXT: testb $1, %bpl +; CHECK-NEXT: testb $1, %bl ; CHECK-NEXT: je .LBB8_2 ; CHECK-NEXT: # %bb.1: # %left +; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rdi ; CHECK-NEXT: movl %eax, %ebx -; CHECK-NEXT: movq (%rsp), %rdi ; CHECK-NEXT: callq consume@PLT ; CHECK-NEXT: movl %ebx, %eax ; CHECK-NEXT: jmp .LBB8_3 ; CHECK-NEXT: .LBB8_2: # %right ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: .LBB8_3: # %right -; CHECK-NEXT: addq $8, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: addq $16, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: popq %rbp +; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq entry: diff --git a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir --- a/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir +++ b/llvm/test/CodeGen/X86/statepoint-invoke-ra-enter-at-end.mir @@ -296,7 +296,8 @@ ; CHECK-NEXT: successors: {{$}} ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp - ; CHECK-NEXT: dead [[COPY1]]:gr64, dead [[COPY]]:gr64 = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @barney, undef $rdi, 2, 0, 2, 0, 2, 45, 2, 0, 2, 2, 2, 0, 2, 0, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 1, 2, 71, 2, 0, 2, 5, 2, 0, 2, 0, [[COPY1]], 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 2, 2, 5, 2, 1, 2, 0, 2, 2, 2, 0, 2, 0, [[COPY]], 2, 7, 2, 0, 2, 1, 2, 6, 2, 0, 2, 0, 2, 1, 2, 1, 2, 0, [[COPY]], 2, 8, 2, 10, 2, 2, [[COPY1]](tied-def 0), [[COPY]](tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64 = COPY [[COPY1]] + ; CHECK-NEXT: dead [[COPY2]]:gr64, dead [[COPY]]:gr64 = STATEPOINT 2882400000, 0, 1, target-flags(x86-plt) @barney, undef $rdi, 2, 0, 2, 0, 2, 45, 2, 0, 2, 2, 2, 0, 2, 0, 2, 0, 2, 1, 2, 0, 2, 7, 2, 0, 2, 2, 2, 1, 2, 71, 2, 0, 2, 5, 2, 0, 2, 0, [[COPY2]], 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 2, 2, 5, 2, 1, 2, 0, 2, 2, 2, 0, 2, 0, [[COPY]], 2, 7, 2, 0, 2, 1, 2, 6, 2, 0, 2, 0, 2, 1, 2, 1, 2, 0, [[COPY]], 2, 8, 2, 10, 2, 2, [[COPY2]](tied-def 0), [[COPY]](tied-def 1), 2, 0, 2, 2, 0, 0, 1, 1, csr_64, implicit-def $rsp, implicit-def $ssp ; CHECK-NEXT: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.4.bb17: @@ -348,9 +349,9 @@ ; CHECK-NEXT: [[MOV64rm2:%[0-9]+]]:gr64 = MOV64rm undef %59:gr64, 1, $noreg, 0, $noreg :: (load unordered (s64) from `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: [[NOT64r2:%[0-9]+]]:gr64 = NOT64r [[NOT64r2]] ; CHECK-NEXT: CMP64rr [[NOT64r2]], [[COPY6]], implicit-def $eflags - ; CHECK-NEXT: undef %100.sub_32bit:gr64_with_sub_8bit = MOV32ri 0 - ; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[CMOV64rr]], %100, 4, implicit killed $eflags - ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, %100, 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags + ; CHECK-NEXT: undef [[MOV32ri2:%[0-9]+]].sub_32bit:gr64_with_sub_8bit = MOV32ri 0 + ; CHECK-NEXT: [[CMOV64rr:%[0-9]+]]:gr64 = CMOV64rr [[CMOV64rr]], [[MOV32ri2]], 4, implicit killed $eflags + ; CHECK-NEXT: INLINEASM &"lock btsq $0,($1)", 1 /* sideeffect attdialect */, 4521993 /* reguse:GR64 */, [[MOV32ri2]], 4521993 /* reguse:GR64 */, undef %56:gr64, 12 /* clobber */, implicit-def dead early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def dead early-clobber $eflags ; CHECK-NEXT: LCMPXCHG32 undef %67:gr64, 1, $noreg, 0, $noreg, [[COPY5]], implicit-def dead $eax, implicit-def dead $eflags, implicit undef $eax :: (load store acquire monotonic (s32) on `ptr addrspace(1) undef`, addrspace 1) ; CHECK-NEXT: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ; CHECK-NEXT: $rdi = COPY [[COPY4]] diff --git a/llvm/test/CodeGen/X86/statepoint-ra.ll b/llvm/test/CodeGen/X86/statepoint-ra.ll --- a/llvm/test/CodeGen/X86/statepoint-ra.ll +++ b/llvm/test/CodeGen/X86/statepoint-ra.ll @@ -65,53 +65,53 @@ ;CHECK: bb.0.bb: ;CHECK: successors: %bb.2(0x40000000), %bb.1(0x40000000) ;CHECK: liveins: $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7 -;CHECK: %49:fr64 = COPY $xmm7 +;CHECK: %55:fr64 = COPY $xmm7 ;CHECK: %10:fr64 = COPY $xmm6 -;CHECK: %41:fr64 = COPY $xmm5 -;CHECK: %45:fr64 = COPY $xmm4 -;CHECK: %53:fr64 = COPY $xmm3 +;CHECK: %45:fr64 = COPY $xmm5 +;CHECK: %52:fr64 = COPY $xmm4 +;CHECK: %59:fr64 = COPY $xmm3 ;CHECK: %6:fr64 = COPY $xmm2 -;CHECK: %58:fr64 = COPY $xmm1 -;CHECK: %62:fr64 = COPY $xmm0 +;CHECK: %64:fr64 = COPY $xmm1 +;CHECK: %68:fr64 = COPY $xmm0 ;CHECK: %3:gr64 = COPY $rdi -;CHECK: %76:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0) +;CHECK: %82:fr64 = MOVSDrm_alt %fixed-stack.0, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.0) ;CHECK: %14:fr64 = MOVSDrm_alt %fixed-stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.1, align 16) -;CHECK: %66:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) -;CHECK: %71:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) +;CHECK: %72:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) +;CHECK: %77:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) ;CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, %3 :: (store (s64) into %stack.0) ;CHECK: ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp -;CHECK: STATEPOINT 2882400000, 0, 0, target-flags(x86-plt) @blam, 2, 9, 2, 0, 2, 59, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 26, 2, 0, 2, 0, 1, 8, %stack.0, 0, 2, 4, %62, 2, 7, 2, 0, 2, 4, %58, 2, 7, 2, 0, 2, 4, %6, 2, 7, 2, 0, 2, 4, %53, 2, 7, 2, 0, 2, 4, %45, 2, 7, 2, 0, 2, 4, %41, 2, 7, 2, 0, 2, 4, %10, 2, 7, 2, 0, 2, 4, %49, 2, 7, 2, 0, 2, 4, %71, 2, 7, 2, 0, 2, 4, %66, 2, 7, 2, 0, 2, 4, %14, 2, 7, 2, 0, 2, 4, %76, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64_mostregs, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s64) on %stack.0) +;CHECK: STATEPOINT 2882400000, 0, 0, target-flags(x86-plt) @blam, 2, 9, 2, 0, 2, 59, 2, 0, 2, 1, 2, 0, 2, 0, 2, 0, 2, 26, 2, 0, 2, 0, 1, 8, %stack.0, 0, 2, 4, %68, 2, 7, 2, 0, 2, 4, %64, 2, 7, 2, 0, 2, 4, %6, 2, 7, 2, 0, 2, 4, %59, 2, 7, 2, 0, 2, 4, %52, 2, 7, 2, 0, 2, 4, %45, 2, 7, 2, 0, 2, 4, %10, 2, 7, 2, 0, 2, 4, %55, 2, 7, 2, 0, 2, 4, %77, 2, 7, 2, 0, 2, 4, %72, 2, 7, 2, 0, 2, 4, %14, 2, 7, 2, 0, 2, 4, %82, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 1, 8, %stack.0, 0, 2, 0, 2, 1, 0, 0, csr_64_mostregs, implicit-def $rsp, implicit-def $ssp :: (volatile load store (s64) on %stack.0) ;CHECK: ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ;CHECK: %17:gr32 = MOV32r0 implicit-def dead $eflags ;CHECK: TEST8rr %17.sub_8bit, %17.sub_8bit, implicit-def $eflags -;CHECK: MOVSDmr %stack.1, 1, $noreg, 0, $noreg, %41 :: (store (s64) into %stack.1) -;CHECK: MOVSDmr %stack.2, 1, $noreg, 0, $noreg, %45 :: (store (s64) into %stack.2) -;CHECK: MOVSDmr %stack.5, 1, $noreg, 0, $noreg, %58 :: (store (s64) into %stack.5) -;CHECK: MOVSDmr %stack.6, 1, $noreg, 0, $noreg, %62 :: (store (s64) into %stack.6) +;CHECK: MOVSDmr %stack.1, 1, $noreg, 0, $noreg, %45 :: (store (s64) into %stack.1) +;CHECK: MOVSDmr %stack.2, 1, $noreg, 0, $noreg, %52 :: (store (s64) into %stack.2) +;CHECK: MOVSDmr %stack.5, 1, $noreg, 0, $noreg, %64 :: (store (s64) into %stack.5) +;CHECK: MOVSDmr %stack.6, 1, $noreg, 0, $noreg, %68 :: (store (s64) into %stack.6) ;CHECK: JCC_1 %bb.2, 4, implicit killed $eflags ;CHECK: bb.1: ;CHECK: successors: %bb.3(0x80000000) -;CHECK: %54:fr64 = MOVSDrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s64) from constant-pool) -;CHECK: MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %54 :: (store (s64) into %stack.3) -;CHECK: MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %54 :: (store (s64) into %stack.4) -;CHECK: MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %54 :: (store (s64) into %stack.7) +;CHECK: %60:fr64 = MOVSDrm_alt $rip, 1, $noreg, %const.0, $noreg :: (load (s64) from constant-pool) +;CHECK: MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %60 :: (store (s64) into %stack.3) +;CHECK: MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %60 :: (store (s64) into %stack.4) +;CHECK: MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %60 :: (store (s64) into %stack.7) ;CHECK: JMP_1 %bb.3 ;CHECK: bb.2.bb13: ;CHECK: successors: %bb.3(0x80000000) ;CHECK: ADJCALLSTACKDOWN64 8, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ;CHECK: MOVSDmr $rsp, 1, $noreg, 0, $noreg, %14 :: (store (s64) into stack) ;CHECK: dead $edi = MOV32r0 implicit-def dead $eflags, implicit-def $rdi -;CHECK: $xmm0 = COPY %62 -;CHECK: $xmm1 = COPY %58 +;CHECK: $xmm0 = COPY %68 +;CHECK: $xmm1 = COPY %64 ;CHECK: $xmm2 = COPY %6 -;CHECK: $xmm3 = COPY %45 -;CHECK: $xmm4 = COPY %41 +;CHECK: $xmm3 = COPY %52 +;CHECK: $xmm4 = COPY %45 ;CHECK: $xmm5 = COPY %10 -;CHECK: $xmm6 = COPY %71 -;CHECK: $xmm7 = COPY %66 -;CHECK: MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %49 :: (store (s64) into %stack.3) -;CHECK: MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %53 :: (store (s64) into %stack.4) -;CHECK: MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %76 :: (store (s64) into %stack.7) +;CHECK: $xmm6 = COPY %77 +;CHECK: $xmm7 = COPY %72 +;CHECK: MOVSDmr %stack.3, 1, $noreg, 0, $noreg, %55 :: (store (s64) into %stack.3) +;CHECK: MOVSDmr %stack.4, 1, $noreg, 0, $noreg, %59 :: (store (s64) into %stack.4) +;CHECK: MOVSDmr %stack.7, 1, $noreg, 0, $noreg, %82 :: (store (s64) into %stack.7) ;CHECK: STATEPOINT 2, 5, 9, undef %22:gr64, $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7, 2, 0, 2, 0, 2, 59, 2, 0, 2, 2, 2, 0, 2, 70, 2, 0, 2, 26, 2, 0, 2, 0, 2, 0, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.0, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 1, 2, 0, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def dead $eax :: (load (s64) from %stack.1), (load (s64) from %stack.2), (load (s64) from %stack.3), (load (s64) from %stack.4), (load (s64) from %stack.5), (load (s64) from %stack.6), (load (s64) from %fixed-stack.2), (load (s64) from %fixed-stack.3, align 16), (load (s64) from %fixed-stack.0) ;CHECK: ADJCALLSTACKUP64 8, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ;CHECK: bb.3.bb15: @@ -132,19 +132,19 @@ ;CHECK: bb.5.bb21: ;CHECK: successors: ;CHECK: ADJCALLSTACKDOWN64 8, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp -;CHECK: %79:fr64 = MOVSDrm_alt %stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %stack.7) -;CHECK: MOVSDmr $rsp, 1, $noreg, 0, $noreg, %79 :: (store (s64) into stack) +;CHECK: %85:fr64 = MOVSDrm_alt %stack.7, 1, $noreg, 0, $noreg :: (load (s64) from %stack.7) +;CHECK: MOVSDmr $rsp, 1, $noreg, 0, $noreg, %85 :: (store (s64) into stack) ;CHECK: $xmm0 = MOVSDrm_alt %stack.6, 1, $noreg, 0, $noreg :: (load (s64) from %stack.6) ;CHECK: $xmm1 = MOVSDrm_alt %stack.5, 1, $noreg, 0, $noreg :: (load (s64) from %stack.5) ;CHECK: $xmm2 = MOVSDrm_alt %stack.4, 1, $noreg, 0, $noreg :: (load (s64) from %stack.4) ;CHECK: $xmm3 = MOVSDrm_alt %stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %stack.2) ;CHECK: $xmm4 = MOVSDrm_alt %stack.1, 1, $noreg, 0, $noreg :: (load (s64) from %stack.1) ;CHECK: $xmm5 = MOVSDrm_alt %stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %stack.3) -;CHECK: %74:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) -;CHECK: $xmm6 = COPY %74 +;CHECK: %80:fr64 = MOVSDrm_alt %fixed-stack.3, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.3, align 16) +;CHECK: $xmm6 = COPY %80 ;CHECK: $esi = MOV32ri 51 -;CHECK: %69:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) -;CHECK: $xmm7 = COPY %69 +;CHECK: %75:fr64 = MOVSDrm_alt %fixed-stack.2, 1, $noreg, 0, $noreg :: (load (s64) from %fixed-stack.2) +;CHECK: $xmm7 = COPY %75 ;CHECK: STATEPOINT 2, 5, 10, undef %36:gr64, undef $rdi, $xmm0, $xmm1, $xmm2, $xmm3, $xmm4, $xmm5, $xmm6, $xmm7, killed $esi, 2, 0, 2, 0, 2, 105, 2, 0, 2, 2, 2, 0, 2, 97, 2, 0, 2, 26, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 7, 2, 0, 2, 2, 2, 2, 2, 46, 2, 0, 2, 20, 2, 0, 2, 0, 2, 4278124286, 2, 4, 1, 8, %stack.6, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.5, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.4, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.1, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.3, 0, 2, 7, 2, 0, 2, 4, 1, 8, %fixed-stack.2, 0, 2, 7, 2, 0, 2, 4, 1, 8, %stack.7, 0, 2, 7, 2, 0, 2, 3, 2, 51, 2, 1, 2, 4278124286, 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp :: (load (s64) from %stack.1), (load (s64) from %stack.2), (load (s64) from %stack.3), (load (s64) from %stack.4), (load (s64) from %stack.5), (load (s64) from %stack.6), (load (s64) from %fixed-stack.2), (load (s64) from %fixed-stack.3, align 16), (load (s64) from %stack.7) ;CHECK: ADJCALLSTACKUP64 8, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp ;CHECK: bb.6.bb23 (landing-pad): diff --git a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll --- a/llvm/test/CodeGen/X86/statepoint-vreg-details.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg-details.ll @@ -31,7 +31,7 @@ ; CHECK-PREG-LABEL: name: test_relocate ; CHECK-PREG: renamable $rbx = COPY $rdi ; CHECK-PREG: renamable $rbx = STATEPOINT 0, 0, 0, @return_i1, 2, 0, 2, 0, 2, 0, 2, 1, killed renamable $rbx(tied-def 0), 2, 0, 2, 1, 0, 0, csr_64, implicit-def $rsp, implicit-def $ssp, implicit-def $al -; CHECK-PREG: renamable $bpl = COPY killed $al +; CHECK-PREG: renamable $bpl = COPY $al ; CHECK-PREG: $rdi = COPY killed renamable $rbx ; CHECK-PREG: CALL64pcrel32 @consume, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.ll b/llvm/test/CodeGen/X86/statepoint-vreg.ll --- a/llvm/test/CodeGen/X86/statepoint-vreg.ll +++ b/llvm/test/CodeGen/X86/statepoint-vreg.ll @@ -244,31 +244,30 @@ ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: pushq %r14 -; CHECK-NEXT: .cfi_def_cfa_offset 24 ; CHECK-NEXT: pushq %rbx +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: pushq %rax ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: .cfi_offset %rbx, -32 -; CHECK-NEXT: .cfi_offset %r14, -24 +; CHECK-NEXT: .cfi_offset %rbx, -24 ; CHECK-NEXT: .cfi_offset %rbp, -16 -; CHECK-NEXT: movl %esi, %r14d +; CHECK-NEXT: movl %esi, %ebp ; CHECK-NEXT: movq %rdi, %rbx ; CHECK-NEXT: callq return_i1@PLT ; CHECK-NEXT: .Ltmp7: -; CHECK-NEXT: testb $1, %r14b +; CHECK-NEXT: testb $1, %bpl ; CHECK-NEXT: je .LBB7_2 ; CHECK-NEXT: # %bb.1: # %left -; CHECK-NEXT: movl %eax, %ebp ; CHECK-NEXT: movq %rbx, %rdi +; CHECK-NEXT: movl %eax, %ebx ; CHECK-NEXT: callq consume@PLT -; CHECK-NEXT: movl %ebp, %eax +; CHECK-NEXT: movl %ebx, %eax ; CHECK-NEXT: jmp .LBB7_3 ; CHECK-NEXT: .LBB7_2: # %right ; CHECK-NEXT: movb $1, %al ; CHECK-NEXT: .LBB7_3: # %right -; CHECK-NEXT: popq %rbx +; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: .cfi_def_cfa_offset 24 -; CHECK-NEXT: popq %r14 +; CHECK-NEXT: popq %rbx ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: popq %rbp ; CHECK-NEXT: .cfi_def_cfa_offset 8 diff --git a/llvm/test/DebugInfo/ARM/sdag-split-arg.ll b/llvm/test/DebugInfo/ARM/sdag-split-arg.ll --- a/llvm/test/DebugInfo/ARM/sdag-split-arg.ll +++ b/llvm/test/DebugInfo/ARM/sdag-split-arg.ll @@ -19,8 +19,8 @@ ; Function Attrs: optsize ssp define i64 @_Z3foox(i64 returned) local_unnamed_addr #0 !dbg !13 { tail call void @llvm.dbg.value(metadata i64 %0, metadata !17, metadata !DIExpression()), !dbg !18 - ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 0 32] $r5 - ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 32 32] $r4 + ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 0 32] $r0 + ; CHECK: @DEBUG_VALUE: foo:offset <- [DW_OP_LLVM_fragment 32 32] $r1 %2 = load i64, ptr @g, align 8, !dbg !19, !tbaa !21 %3 = icmp eq i64 %2, %0, !dbg !19 diff --git a/llvm/test/DebugInfo/X86/live-debug-values.ll b/llvm/test/DebugInfo/X86/live-debug-values.ll --- a/llvm/test/DebugInfo/X86/live-debug-values.ll +++ b/llvm/test/DebugInfo/X86/live-debug-values.ll @@ -28,6 +28,9 @@ ; This case will also produce multiple locations but only the debug range ; extension is tested here. +; XFAIL: * +; This test is failing after splitting the live range of variable "n" in %bb.3. + ; DBG_VALUE for variable "n" is extended into %bb.5 from its predecessors %bb.3 ; and %bb.4. ; CHECK: .LBB0_5: diff --git a/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands.ll b/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands.ll --- a/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands.ll +++ b/llvm/test/tools/llvm-objdump/XCOFF/disassemble-symbolize-operands.ll @@ -17,13 +17,13 @@ ; CHECK-NEXT: : ; CHECK-NEXT: 60: bf 8, 0x84 ; CHECK-NEXT: : -; CHECK-NEXT: 64: mr 3, 31 -; CHECK-NEXT: 68: bl 0x0 <.internal> -; CHECK-NEXT: 6c: mr 31, 3 -; CHECK-NEXT: 70: cmplwi 3, 11 -; CHECK-NEXT: 74: bt 0, 0x60 -; CHECK-NEXT: 78: bl 0x0 <.internal> -; CHECK-NEXT: 7c: nop +; CHECK-NEXT: 64: bl 0x0 <.internal> +; CHECK-NEXT: 68: cmplwi 3, 11 +; CHECK-NEXT: 6c: bt 0, 0x60 +; CHECK-NEXT: 70: mr 31, 3 +; CHECK-NEXT: 74: bl 0x0 <.internal> +; CHECK-NEXT: 78: nop +; CHECK-NEXT: 7c: mr 3, 31 ; CHECK-NEXT: 80: b 0x60 ; CHECK-NEXT: : ; CHECK-NEXT: 84: lwz 31, 60(1)