diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -902,15 +902,15 @@ BuildMI(MBB, MBBI, dl, TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 : PPC::PROBED_STACKALLOC_32)) - .addDef(ScratchReg) - .addDef(TempReg) // TempReg stores the old sp. + .addDef(TempReg) + .addDef(ScratchReg) // ScratchReg stores the old sp. .addImm(NegFrameSize); // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we // update the ScratchReg to meet the assumption that ScratchReg contains // the NegFrameSize. This solution is rather tricky. if (!HasRedZone) { BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) - .addReg(TempReg) + .addReg(ScratchReg) .addReg(SPReg); HasSTUX = true; } @@ -1230,7 +1230,6 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const { - // TODO: Generate CFI instructions. bool isPPC64 = Subtarget.isPPC64(); const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); @@ -1262,6 +1261,7 @@ bool HasBP = RegInfo->hasBasePointer(MF); Register BPReg = RegInfo->getBaseRegister(MF); Align MaxAlign = MFI.getMaxAlign(); + bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); // Subroutines to generate .cfi_* directives. auto buildDefCFAReg = [&](MachineBasicBlock &MBB, @@ -1315,212 +1315,221 @@ .addReg(SPReg) .addReg(NegSizeReg); }; - // Used to probe realignment gap [stackptr - (stackptr % align), stackptr) - // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30 - // available and r1 is already copied to r30 which is BPReg. So BPReg stores - // the value of stackptr. - // First we have to probe tail interval whose size is less than probesize, - // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage, - // ScratchReg stores the value of ((stackptr % align) % probesize). Then we - // probe each block sized probesize until stackptr meets - // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized - // as negprobesize. At both stages, TempReg stores the value of - // (stackptr - (stackptr % align)). - auto dynamicProbe = [&](MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, Register ScratchReg, - Register TempReg) { - assert(HasBP && isPPC64 && "Probe alignment part not available"); + // Used to probe stack when realignment is required. + // Note that, according to ABI's requirement, *sp must always equals the + // value of back-chain pointer, only st(w|d)u(x) can be used to update sp. + // Following is pseudo code: + // final_sp = (sp & align) + negframesize; + // neg_gap = final_sp - sp; + // while (neg_gap < negprobesize) { + // stdu fp, negprobesize(sp); + // neg_gap -= negprobesize; + // } + // stdux fp, sp, neg_gap + // + // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg + // before probe code, we don't need to save it, so we get one additional reg + // that can be used to materialize the probeside if needed to use xform. + // Otherwise, we can NOT materialize probeside, so we can only use Dform for + // now. + // + // The allocations are: + // if (HasBP && HasRedzone) { + // r0: materialize the probesize if needed so that we can use xform. + // r12: `neg_gap` + // } else { + // r0: back-chain pointer + // r12: `neg_gap`. + // } + auto probeRealignedStack = [&](MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register ScratchReg, Register TempReg) { + assert(HasBP && "The function is supposed to have base pointer when its " + "stack is realigned."); assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); - // ScratchReg = stackptr % align - BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) - .addReg(BPReg) - .addImm(0) - .addImm(64 - Log2(MaxAlign)); - // TempReg = stackptr - (stackptr % align) - BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg) - .addReg(ScratchReg) - .addReg(BPReg); - // ScratchReg = (stackptr % align) % probesize - BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) - .addReg(ScratchReg) - .addImm(0) - .addImm(64 - Log2(ProbeSize)); + + // FIXME: We can eliminate this limitation if we get more infomation about + // which part of redzone are already used. Used redzone can be treated + // probed. But there might be `holes' in redzone probed, this could + // complicate the implementation. + assert(ProbeSize >= Subtarget.getRedZoneSize() && + "Probe size should be larger or equal to the size of red-zone so " + "that red-zone is not clobbered by probing."); + + Register &FinalStackPtr = TempReg; + // FIXME: We only support NegProbeSize materializable by DForm currently. + // When HasBP && HasRedzone, we can use xform if we have an additional idle + // register. + NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15)); + assert(isInt<16>(NegProbeSize) && + "NegProbeSize should be materializable by DForm"); Register CRReg = PPC::CR0; - // If (stackptr % align) % probesize == 0, we should not generate probe - // code. Layout of output assembly kinda like: + // Layout of output assembly kinda like: // bb.0: // ... - // cmpldi $scratchreg, 0 - // beq bb.2 - // bb.1: # Probe tail interval - // neg $scratchreg, $scratchreg - // stdux $bpreg, r1, $scratchreg + // sub $scratchreg, $finalsp, r1 + // cmpdi $scratchreg, + // bge bb.2 + // bb.1: + // stdu , (r1) + // sub $scratchreg, $scratchreg, negprobesize + // cmpdi $scratchreg, + // blt bb.1 // bb.2: - // - // cmpd r1, $tempreg - // beq bb.4 - // bb.3: # Loop to probe each block - // stdux $bpreg, r1, $scratchreg - // cmpd r1, $tempreg - // bne bb.3 - // bb.4: - // ... + // stdux , r1, $scratchreg MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); - MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ProbeResidualMBB); - MachineBasicBlock *ProbeLoopPreHeaderMBB = - MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB); MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); MF.insert(MBBInsertPoint, ProbeExitMBB); - // bb.4 - ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); - ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); + // bb.2 + { + Register BackChainPointer = HasRedZone ? BPReg : TempReg; + allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, + BackChainPointer); + if (HasRedZone) + // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg + // to TempReg to satisfy it. + BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg) + .addReg(BPReg) + .addReg(BPReg); + ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); + ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); + } // bb.0 - BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0); - BuildMI(&MBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_EQ) - .addReg(CRReg) - .addMBB(ProbeLoopPreHeaderMBB); - MBB.addSuccessor(ProbeResidualMBB); - MBB.addSuccessor(ProbeLoopPreHeaderMBB); + { + BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg) + .addReg(SPReg) + .addReg(FinalStackPtr); + if (!HasRedZone) + BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg); + BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg) + .addReg(ScratchReg) + .addImm(NegProbeSize); + BuildMI(&MBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_GE) + .addReg(CRReg) + .addMBB(ProbeExitMBB); + MBB.addSuccessor(ProbeLoopBodyMBB); + MBB.addSuccessor(ProbeExitMBB); + } // bb.1 - BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg) - .addReg(ScratchReg); - allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg, - false, BPReg); - ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB); - // bb.2 - MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(), - NegProbeSize, ScratchReg); - BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg) - .addReg(SPReg) - .addReg(TempReg); - BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_EQ) - .addReg(CRReg) - .addMBB(ProbeExitMBB); - ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB); - ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB); - // bb.3 - allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg, - false, BPReg); - BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg) - .addReg(SPReg) - .addReg(TempReg); - BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_NE) - .addReg(CRReg) - .addMBB(ProbeLoopBodyMBB); - ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); - ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); + { + Register BackChainPointer = HasRedZone ? BPReg : TempReg; + allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, + 0, true /*UseDForm*/, BackChainPointer); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI), + ScratchReg) + .addReg(ScratchReg) + .addImm(-NegProbeSize); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), + CRReg) + .addReg(ScratchReg) + .addImm(NegProbeSize); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_LT) + .addReg(CRReg) + .addMBB(ProbeLoopBodyMBB); + ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); + ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); + } // Update liveins. - recomputeLiveIns(*ProbeResidualMBB); - recomputeLiveIns(*ProbeLoopPreHeaderMBB); recomputeLiveIns(*ProbeLoopBodyMBB); recomputeLiveIns(*ProbeExitMBB); return ProbeExitMBB; }; // For case HasBP && MaxAlign > 1, we have to realign the SP by performing - // SP = SP - SP % MaxAlign. + // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since + // the offset subtracted from SP is determined by SP's runtime value. if (HasBP && MaxAlign > 1) { - // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in - // 64-bit mode. - if (isPPC64) { - // Use BPReg to calculate CFA. - if (needsCFI) - buildDefCFA(*CurrentMBB, {MI}, BPReg, 0); - // Since we have SPReg copied to BPReg at the moment, FPReg can be used as - // TempReg. - Register TempReg = FPReg; - CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg); - // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64. - BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) - .addReg(BPReg) - .addReg(BPReg); - } else { - // Initialize current frame pointer. - BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) + // Calculate final stack pointer. + if (isPPC64) + BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) .addReg(SPReg) - .addReg(SPReg); - // Use FPReg to calculate CFA. - if (needsCFI) - buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); + .addImm(0) + .addImm(64 - Log2(MaxAlign)); + else BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) - .addReg(FPReg) + .addReg(SPReg) .addImm(0) .addImm(32 - Log2(MaxAlign)) .addImm(31); - BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg) - .addReg(ScratchReg) - .addReg(SPReg); - } + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), + FPReg) + .addReg(ScratchReg) + .addReg(SPReg); + MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4), + FPReg) + .addReg(ScratchReg) + .addReg(FPReg); + CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg); + if (needsCFI) + buildDefCFAReg(*CurrentMBB, {MI}, FPReg); } else { // Initialize current frame pointer. BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); // Use FPReg to calculate CFA. if (needsCFI) buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); - } - // Probe residual part. - if (NegResidualSize) { - bool ResidualUseDForm = CanUseDForm(NegResidualSize); - if (!ResidualUseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); - allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, - ResidualUseDForm, FPReg); - } - bool UseDForm = CanUseDForm(NegProbeSize); - // If number of blocks is small, just probe them directly. - if (NumBlocks < 3) { - if (!UseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); - for (int i = 0; i < NumBlocks; ++i) - allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, - FPReg); - if (needsCFI) { - // Restore using SPReg to calculate CFA. - buildDefCFAReg(*CurrentMBB, {MI}, SPReg); + // Probe residual part. + if (NegResidualSize) { + bool ResidualUseDForm = CanUseDForm(NegResidualSize); + if (!ResidualUseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); + allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, + ResidualUseDForm, FPReg); } - } else { - // Since CTR is a volatile register and current shrinkwrap implementation - // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a - // CTR loop to probe. - // Calculate trip count and stores it in CTRReg. - MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); - BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) - .addReg(ScratchReg, RegState::Kill); - if (!UseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); - // Create MBBs of the loop. - MachineFunction::iterator MBBInsertPoint = - std::next(CurrentMBB->getIterator()); - MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, LoopMBB); - MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ExitMBB); - // Synthesize the loop body. - allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, - UseDForm, FPReg); - BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) - .addMBB(LoopMBB); - LoopMBB->addSuccessor(ExitMBB); - LoopMBB->addSuccessor(LoopMBB); - // Synthesize the exit MBB. - ExitMBB->splice(ExitMBB->end(), CurrentMBB, - std::next(MachineBasicBlock::iterator(MI)), - CurrentMBB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); - CurrentMBB->addSuccessor(LoopMBB); - if (needsCFI) { - // Restore using SPReg to calculate CFA. - buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); + bool UseDForm = CanUseDForm(NegProbeSize); + // If number of blocks is small, just probe them directly. + if (NumBlocks < 3) { + if (!UseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); + for (int i = 0; i < NumBlocks; ++i) + allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, + FPReg); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*CurrentMBB, {MI}, SPReg); + } + } else { + // Since CTR is a volatile register and current shrinkwrap implementation + // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a + // CTR loop to probe. + // Calculate trip count and stores it in CTRReg. + MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) + .addReg(ScratchReg, RegState::Kill); + if (!UseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); + // Create MBBs of the loop. + MachineFunction::iterator MBBInsertPoint = + std::next(CurrentMBB->getIterator()); + MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, LoopMBB); + MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ExitMBB); + // Synthesize the loop body. + allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, + UseDForm, FPReg); + BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) + .addMBB(LoopMBB); + LoopMBB->addSuccessor(ExitMBB); + LoopMBB->addSuccessor(LoopMBB); + // Synthesize the exit MBB. + ExitMBB->splice(ExitMBB->end(), CurrentMBB, + std::next(MachineBasicBlock::iterator(MI)), + CurrentMBB->end()); + ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); + CurrentMBB->addSuccessor(LoopMBB); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); + } + // Update liveins. + recomputeLiveIns(*LoopMBB); + recomputeLiveIns(*ExitMBB); } - // Update liveins. - recomputeLiveIns(*LoopMBB); - recomputeLiveIns(*ExitMBB); } ++NumPrologProbed; MI.eraseFromParent(); diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll --- a/llvm/test/CodeGen/PowerPC/pr46759.ll +++ b/llvm/test/CodeGen/PowerPC/pr46759.ll @@ -6,32 +6,26 @@ define void @foo(i32 %vla_size) #0 { ; CHECK-LE-LABEL: foo: ; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: clrldi r12, r1, 53 ; CHECK-LE-NEXT: std r31, -8(r1) ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 53 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB0_2 -; CHECK-LE-NEXT: # %bb.1: # %entry -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: .LBB0_2: # %entry -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB0_4 -; CHECK-LE-NEXT: .LBB0_3: # %entry +; CHECK-LE-NEXT: sub r0, r1, r12 +; CHECK-LE-NEXT: li r12, -6144 +; CHECK-LE-NEXT: add r0, r12, r0 +; CHECK-LE-NEXT: sub r12, r0, r1 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB0_2 +; CHECK-LE-NEXT: .LBB0_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB0_3 -; CHECK-LE-NEXT: .LBB0_4: # %entry -; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: stdu r12, -2048(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: addi r12, r12, 4096 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: blt cr0, .LBB0_1 +; CHECK-LE-NEXT: .LBB0_2: # %entry +; CHECK-LE-NEXT: stdux r30, r1, r12 +; CHECK-LE-NEXT: mr r0, r30 +; CHECK-LE-NEXT: .cfi_def_cfa_register r0 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r31, -8 ; CHECK-LE-NEXT: .cfi_offset r30, -16 @@ -52,13 +46,13 @@ ; CHECK-LE-NEXT: add r4, r1, r4 ; CHECK-LE-NEXT: stdux r3, r1, r5 ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: beq cr0, .LBB0_6 -; CHECK-LE-NEXT: .LBB0_5: # %entry +; CHECK-LE-NEXT: beq cr0, .LBB0_4 +; CHECK-LE-NEXT: .LBB0_3: # %entry ; CHECK-LE-NEXT: # ; CHECK-LE-NEXT: stdu r3, -4096(r1) ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: bne cr0, .LBB0_5 -; CHECK-LE-NEXT: .LBB0_6: # %entry +; CHECK-LE-NEXT: bne cr0, .LBB0_3 +; CHECK-LE-NEXT: .LBB0_4: # %entry ; CHECK-LE-NEXT: addi r3, r1, 2048 ; CHECK-LE-NEXT: lbz r3, 0(r3) ; CHECK-LE-NEXT: mr r1, r30 diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll --- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll @@ -44,12 +44,12 @@ define i8 @f1() #0 "stack-probe-size"="0" nounwind { ; CHECK-LE-LABEL: f1: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: li r0, 259 -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: li r12, 259 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB1_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -16(r1) +; CHECK-LE-NEXT: stdu r0, -16(r1) ; CHECK-LE-NEXT: bdnz .LBB1_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -60,12 +60,12 @@ ; ; CHECK-BE-LABEL: f1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: li r0, 260 -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: li r12, 260 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB1_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -16(r1) +; CHECK-BE-NEXT: stdu r0, -16(r1) ; CHECK-BE-NEXT: bdnz .LBB1_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -76,16 +76,16 @@ ; ; CHECK-32-LABEL: f1: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: li r0, 257 -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: li r12, 257 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB1_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -16(r1) +; CHECK-32-NEXT: stwu r0, -16(r1) ; CHECK-32-NEXT: bdnz .LBB1_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: li r3, 3 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: lbz r3, 16(r1) @@ -102,13 +102,13 @@ define i8 @f2() #0 nounwind { ; CHECK-LE-LABEL: f2: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: li r0, 16 -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: li r12, 16 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB2_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: stdu r0, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB2_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -119,13 +119,13 @@ ; ; CHECK-BE-LABEL: f2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: li r0, 16 -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: li r12, 16 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB2_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: stdu r0, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB2_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -136,16 +136,16 @@ ; ; CHECK-32-LABEL: f2: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: li r0, 16 -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: li r12, 16 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB2_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB2_1 ; CHECK-32-NEXT: # %bb.2: # %entry -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) @@ -166,10 +166,10 @@ define i8 @f3() #0 "stack-probe-size"="32768" nounwind { ; CHECK-LE-LABEL: f3: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: stdu r12, -32768(r1) -; CHECK-LE-NEXT: stdu r12, -32768(r1) +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: stdu r0, -32768(r1) +; CHECK-LE-NEXT: stdu r0, -32768(r1) ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) ; CHECK-LE-NEXT: lbz r3, 48(r1) @@ -178,10 +178,10 @@ ; ; CHECK-BE-LABEL: f3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: stdu r12, -32768(r1) -; CHECK-BE-NEXT: stdu r12, -32768(r1) +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: stdu r0, -32768(r1) +; CHECK-BE-NEXT: stdu r0, -32768(r1) ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) ; CHECK-BE-NEXT: lbz r3, 64(r1) @@ -190,11 +190,11 @@ ; ; CHECK-32-LABEL: f3: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: stwu r12, -32768(r1) -; CHECK-32-NEXT: stwu r12, -32768(r1) -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: stwu r0, -32768(r1) +; CHECK-32-NEXT: stwu r0, -32768(r1) +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) @@ -261,15 +261,15 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind { ; CHECK-LE-LABEL: f5: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: li r0, 16 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: li r12, 16 +; CHECK-LE-NEXT: mtctr r12 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 0 ; CHECK-LE-NEXT: .LBB5_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: stdux r0, r1, r12 ; CHECK-LE-NEXT: bdnz .LBB5_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -280,15 +280,15 @@ ; ; CHECK-BE-LABEL: f5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: li r0, 16 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: li r12, 16 +; CHECK-BE-NEXT: mtctr r12 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 0 ; CHECK-BE-NEXT: .LBB5_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: stdux r0, r1, r12 ; CHECK-BE-NEXT: bdnz .LBB5_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -299,18 +299,18 @@ ; ; CHECK-32-LABEL: f5: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: li r0, 16 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: nop +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: li r12, 16 +; CHECK-32-NEXT: mtctr r12 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 0 ; CHECK-32-NEXT: .LBB5_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: stwux r0, r1, r12 ; CHECK-32-NEXT: bdnz .LBB5_1 ; CHECK-32-NEXT: # %bb.2: # %entry -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) @@ -331,14 +331,14 @@ define i8 @f6() #0 nounwind { ; CHECK-LE-LABEL: f6: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: lis r0, 4 -; CHECK-LE-NEXT: nop -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: lis r12, 4 +; CHECK-LE-NEXT: ori r12, r12, 0 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB6_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: stdu r0, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB6_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -349,14 +349,14 @@ ; ; CHECK-BE-LABEL: f6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: lis r0, 4 -; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: lis r12, 4 +; CHECK-BE-NEXT: ori r12, r12, 0 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB6_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: stdu r0, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB6_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -367,17 +367,17 @@ ; ; CHECK-32-LABEL: f6: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: lis r0, 4 -; CHECK-32-NEXT: nop -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: lis r12, 4 +; CHECK-32-NEXT: ori r12, r12, 0 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB6_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB6_1 ; CHECK-32-NEXT: # %bb.2: # %entry -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 16(r1) @@ -398,17 +398,17 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind { ; CHECK-LE-LABEL: f7: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: ori r0, r0, 13776 -; CHECK-LE-NEXT: stdux r12, r1, r0 -; CHECK-LE-NEXT: li r0, 15258 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: ori r12, r12, 13776 +; CHECK-LE-NEXT: stdux r0, r1, r12 +; CHECK-LE-NEXT: li r12, 15258 +; CHECK-LE-NEXT: mtctr r12 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 0 ; CHECK-LE-NEXT: .LBB7_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: stdux r0, r1, r12 ; CHECK-LE-NEXT: bdnz .LBB7_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: li r3, 3 @@ -419,17 +419,17 @@ ; ; CHECK-BE-LABEL: f7: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: ori r0, r0, 13760 -; CHECK-BE-NEXT: stdux r12, r1, r0 -; CHECK-BE-NEXT: li r0, 15258 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: ori r12, r12, 13760 +; CHECK-BE-NEXT: stdux r0, r1, r12 +; CHECK-BE-NEXT: li r12, 15258 +; CHECK-BE-NEXT: mtctr r12 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 0 ; CHECK-BE-NEXT: .LBB7_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: stdux r0, r1, r12 ; CHECK-BE-NEXT: bdnz .LBB7_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: li r3, 3 @@ -440,20 +440,20 @@ ; ; CHECK-32-LABEL: f7: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: ori r0, r0, 13808 -; CHECK-32-NEXT: stwux r12, r1, r0 -; CHECK-32-NEXT: li r0, 15258 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: nop +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: ori r12, r12, 13808 +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: li r12, 15258 +; CHECK-32-NEXT: mtctr r12 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 0 ; CHECK-32-NEXT: .LBB7_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: stwux r0, r1, r12 ; CHECK-32-NEXT: bdnz .LBB7_1 ; CHECK-32-NEXT: # %bb.2: # %entry -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: stb r3, 9(r1) diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll --- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll @@ -44,13 +44,13 @@ define i8 @f1() #0 "stack-probe-size"="0" { ; CHECK-LE-LABEL: f1: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: li r0, 259 -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: li r12, 259 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB1_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -16(r1) +; CHECK-LE-NEXT: stdu r0, -16(r1) ; CHECK-LE-NEXT: bdnz .LBB1_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -63,13 +63,13 @@ ; ; CHECK-BE-LABEL: f1: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: li r0, 260 -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: li r12, 260 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB1_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -16(r1) +; CHECK-BE-NEXT: stdu r0, -16(r1) ; CHECK-BE-NEXT: bdnz .LBB1_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -82,17 +82,17 @@ ; ; CHECK-32-LABEL: f1: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: li r0, 257 -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: li r12, 257 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB1_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -16(r1) +; CHECK-32-NEXT: stwu r0, -16(r1) ; CHECK-32-NEXT: bdnz .LBB1_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 4112 ; CHECK-32-NEXT: li r3, 3 @@ -111,14 +111,14 @@ define i8 @f2() #0 { ; CHECK-LE-LABEL: f2: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: li r0, 16 -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: li r12, 16 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB2_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: stdu r0, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB2_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -131,14 +131,14 @@ ; ; CHECK-BE-LABEL: f2: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: li r0, 16 -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: li r12, 16 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB2_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: stdu r0, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB2_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -151,18 +151,18 @@ ; ; CHECK-32-LABEL: f2: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: li r0, 16 -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: li r12, 16 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB2_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB2_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 65552 ; CHECK-32-NEXT: li r3, 3 @@ -184,11 +184,11 @@ define i8 @f3() #0 "stack-probe-size"="32768" { ; CHECK-LE-LABEL: f3: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: stdu r12, -32768(r1) -; CHECK-LE-NEXT: stdu r12, -32768(r1) +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: stdu r0, -32768(r1) +; CHECK-LE-NEXT: stdu r0, -32768(r1) ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 ; CHECK-LE-NEXT: .cfi_def_cfa_offset 65584 ; CHECK-LE-NEXT: li r3, 3 @@ -199,11 +199,11 @@ ; ; CHECK-BE-LABEL: f3: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: stdu r12, -32768(r1) -; CHECK-BE-NEXT: stdu r12, -32768(r1) +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: stdu r0, -32768(r1) +; CHECK-BE-NEXT: stdu r0, -32768(r1) ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 ; CHECK-BE-NEXT: .cfi_def_cfa_offset 65600 ; CHECK-BE-NEXT: li r3, 3 @@ -214,13 +214,13 @@ ; ; CHECK-32-LABEL: f3: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: stwu r12, -32768(r1) -; CHECK-32-NEXT: stwu r12, -32768(r1) +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: stwu r0, -32768(r1) +; CHECK-32-NEXT: stwu r0, -32768(r1) ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 65552 ; CHECK-32-NEXT: li r3, 3 @@ -291,16 +291,16 @@ define i8 @f5() #0 "stack-probe-size"="65536" { ; CHECK-LE-LABEL: f5: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: li r0, 16 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: li r12, 16 +; CHECK-LE-NEXT: mtctr r12 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 0 ; CHECK-LE-NEXT: .LBB5_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: stdux r0, r1, r12 ; CHECK-LE-NEXT: bdnz .LBB5_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -313,16 +313,16 @@ ; ; CHECK-BE-LABEL: f5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: li r0, 16 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: li r12, 16 +; CHECK-BE-NEXT: mtctr r12 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 0 ; CHECK-BE-NEXT: .LBB5_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: stdux r0, r1, r12 ; CHECK-BE-NEXT: bdnz .LBB5_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -335,20 +335,20 @@ ; ; CHECK-32-LABEL: f5: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: li r0, 16 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: nop +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: li r12, 16 +; CHECK-32-NEXT: mtctr r12 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 0 ; CHECK-32-NEXT: .LBB5_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: stwux r0, r1, r12 ; CHECK-32-NEXT: bdnz .LBB5_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 1048592 ; CHECK-32-NEXT: li r3, 3 @@ -370,15 +370,15 @@ define i8 @f6() #0 { ; CHECK-LE-LABEL: f6: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) -; CHECK-LE-NEXT: lis r0, 4 -; CHECK-LE-NEXT: nop -; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: stdu r0, -48(r1) +; CHECK-LE-NEXT: lis r12, 4 +; CHECK-LE-NEXT: ori r12, r12, 0 +; CHECK-LE-NEXT: mtctr r12 ; CHECK-LE-NEXT: .LBB6_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: stdu r0, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB6_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -391,15 +391,15 @@ ; ; CHECK-BE-LABEL: f6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) -; CHECK-BE-NEXT: lis r0, 4 -; CHECK-BE-NEXT: nop -; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: stdu r0, -64(r1) +; CHECK-BE-NEXT: lis r12, 4 +; CHECK-BE-NEXT: ori r12, r12, 0 +; CHECK-BE-NEXT: mtctr r12 ; CHECK-BE-NEXT: .LBB6_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: stdu r0, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB6_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -412,19 +412,19 @@ ; ; CHECK-32-LABEL: f6: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) -; CHECK-32-NEXT: lis r0, 4 -; CHECK-32-NEXT: nop -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: stwu r0, -16(r1) +; CHECK-32-NEXT: lis r12, 4 +; CHECK-32-NEXT: ori r12, r12, 0 +; CHECK-32-NEXT: mtctr r12 ; CHECK-32-NEXT: .LBB6_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB6_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 1073741840 ; CHECK-32-NEXT: li r3, 3 @@ -446,18 +446,18 @@ define i8 @f7() #0 "stack-probe-size"="65536" { ; CHECK-LE-LABEL: f7: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: ori r0, r0, 13776 -; CHECK-LE-NEXT: stdux r12, r1, r0 -; CHECK-LE-NEXT: li r0, 15258 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: nop +; CHECK-LE-NEXT: mr r0, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 13776 +; CHECK-LE-NEXT: stdux r0, r1, r12 +; CHECK-LE-NEXT: li r12, 15258 +; CHECK-LE-NEXT: mtctr r12 +; CHECK-LE-NEXT: lis r12, -1 +; CHECK-LE-NEXT: ori r12, r12, 0 ; CHECK-LE-NEXT: .LBB7_1: # %entry ; CHECK-LE-NEXT: # -; CHECK-LE-NEXT: stdux r12, r1, r0 +; CHECK-LE-NEXT: stdux r0, r1, r12 ; CHECK-LE-NEXT: bdnz .LBB7_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -470,18 +470,18 @@ ; ; CHECK-BE-LABEL: f7: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: ori r0, r0, 13760 -; CHECK-BE-NEXT: stdux r12, r1, r0 -; CHECK-BE-NEXT: li r0, 15258 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: nop +; CHECK-BE-NEXT: mr r0, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r0, 0 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 13760 +; CHECK-BE-NEXT: stdux r0, r1, r12 +; CHECK-BE-NEXT: li r12, 15258 +; CHECK-BE-NEXT: mtctr r12 +; CHECK-BE-NEXT: lis r12, -1 +; CHECK-BE-NEXT: ori r12, r12, 0 ; CHECK-BE-NEXT: .LBB7_1: # %entry ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: stdux r12, r1, r0 +; CHECK-BE-NEXT: stdux r0, r1, r12 ; CHECK-BE-NEXT: bdnz .LBB7_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 @@ -494,22 +494,22 @@ ; ; CHECK-32-LABEL: f7: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: ori r0, r0, 13808 -; CHECK-32-NEXT: stwux r12, r1, r0 -; CHECK-32-NEXT: li r0, 15258 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: nop +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: .cfi_def_cfa r0, 0 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 13808 +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: li r12, 15258 +; CHECK-32-NEXT: mtctr r12 +; CHECK-32-NEXT: lis r12, -1 +; CHECK-32-NEXT: ori r12, r12, 0 ; CHECK-32-NEXT: .LBB7_1: # %entry ; CHECK-32-NEXT: # -; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: stwux r0, r1, r12 ; CHECK-32-NEXT: bdnz .LBB7_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 -; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 1000000016 ; CHECK-32-NEXT: li r3, 3 @@ -599,31 +599,24 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 { ; CHECK-LE-LABEL: f9: ; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: clrldi r12, r1, 53 ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 53 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB9_2 -; CHECK-LE-NEXT: # %bb.1: -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: sub r0, r1, r12 +; CHECK-LE-NEXT: li r12, -10240 +; CHECK-LE-NEXT: add r0, r12, r0 +; CHECK-LE-NEXT: sub r12, r0, r1 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB9_2 +; CHECK-LE-NEXT: .LBB9_1: +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: addi r12, r12, 4096 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: blt cr0, .LBB9_1 ; CHECK-LE-NEXT: .LBB9_2: -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB9_4 -; CHECK-LE-NEXT: .LBB9_3: -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB9_3 -; CHECK-LE-NEXT: .LBB9_4: -; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: stdu r12, -2048(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdux r30, r1, r12 +; CHECK-LE-NEXT: mr r0, r30 +; CHECK-LE-NEXT: .cfi_def_cfa_register r0 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r30, -16 ; CHECK-LE-NEXT: addi r4, r1, 2048 @@ -637,31 +630,24 @@ ; ; CHECK-BE-LABEL: f9: ; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: clrldi r12, r1, 53 ; CHECK-BE-NEXT: std r30, -16(r1) ; CHECK-BE-NEXT: mr r30, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-BE-NEXT: clrldi r0, r30, 53 -; CHECK-BE-NEXT: subc r12, r30, r0 -; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB9_2 -; CHECK-BE-NEXT: # %bb.1: -; CHECK-BE-NEXT: neg r0, r0 -; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: sub r0, r1, r12 +; CHECK-BE-NEXT: li r12, -10240 +; CHECK-BE-NEXT: add r0, r12, r0 +; CHECK-BE-NEXT: sub r12, r0, r1 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: bge cr0, .LBB9_2 +; CHECK-BE-NEXT: .LBB9_1: +; CHECK-BE-NEXT: stdu r30, -4096(r1) +; CHECK-BE-NEXT: addi r12, r12, 4096 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: blt cr0, .LBB9_1 ; CHECK-BE-NEXT: .LBB9_2: -; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB9_4 -; CHECK-BE-NEXT: .LBB9_3: -; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: bne cr0, .LBB9_3 -; CHECK-BE-NEXT: .LBB9_4: -; CHECK-BE-NEXT: mr r12, r30 -; CHECK-BE-NEXT: stdu r12, -2048(r1) -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: stdux r30, r1, r12 +; CHECK-BE-NEXT: mr r0, r30 +; CHECK-BE-NEXT: .cfi_def_cfa_register r0 ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r30, -16 ; CHECK-BE-NEXT: addi r4, r1, 2048 @@ -675,15 +661,23 @@ ; ; CHECK-32-LABEL: f9: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: clrlwi r0, r12, 21 -; CHECK-32-NEXT: subc r1, r1, r0 -; CHECK-32-NEXT: stwu r12, -2048(r1) -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: clrlwi r12, r1, 21 ; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: li r12, -10240 +; CHECK-32-NEXT: add r0, r12, r0 +; CHECK-32-NEXT: sub r12, r0, r1 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: bge cr0, .LBB9_2 +; CHECK-32-NEXT: .LBB9_1: +; CHECK-32-NEXT: stwu r0, -4096(r1) +; CHECK-32-NEXT: addi r12, r12, 4096 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: blt cr0, .LBB9_1 +; CHECK-32-NEXT: .LBB9_2: +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: .cfi_def_cfa_register r0 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: addic r0, r0, -8 ; CHECK-32-NEXT: stwx r30, 0, r0 @@ -712,30 +706,24 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 { ; CHECK-LE-LABEL: f10: ; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: clrldi r12, r1, 54 ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 54 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB10_2 -; CHECK-LE-NEXT: # %bb.1: -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: sub r0, r1, r12 +; CHECK-LE-NEXT: li r12, -5120 +; CHECK-LE-NEXT: add r0, r12, r0 +; CHECK-LE-NEXT: sub r12, r0, r1 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB10_2 +; CHECK-LE-NEXT: .LBB10_1: +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: addi r12, r12, 4096 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: blt cr0, .LBB10_1 ; CHECK-LE-NEXT: .LBB10_2: -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB10_4 -; CHECK-LE-NEXT: .LBB10_3: -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB10_3 -; CHECK-LE-NEXT: .LBB10_4: -; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: stdu r12, -1024(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdux r30, r1, r12 +; CHECK-LE-NEXT: mr r0, r30 +; CHECK-LE-NEXT: .cfi_def_cfa_register r0 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r30, -16 ; CHECK-LE-NEXT: addi r4, r1, 1024 @@ -749,30 +737,24 @@ ; ; CHECK-BE-LABEL: f10: ; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: clrldi r12, r1, 54 ; CHECK-BE-NEXT: std r30, -16(r1) ; CHECK-BE-NEXT: mr r30, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-BE-NEXT: clrldi r0, r30, 54 -; CHECK-BE-NEXT: subc r12, r30, r0 -; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB10_2 -; CHECK-BE-NEXT: # %bb.1: -; CHECK-BE-NEXT: neg r0, r0 -; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: sub r0, r1, r12 +; CHECK-BE-NEXT: li r12, -5120 +; CHECK-BE-NEXT: add r0, r12, r0 +; CHECK-BE-NEXT: sub r12, r0, r1 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: bge cr0, .LBB10_2 +; CHECK-BE-NEXT: .LBB10_1: +; CHECK-BE-NEXT: stdu r30, -4096(r1) +; CHECK-BE-NEXT: addi r12, r12, 4096 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: blt cr0, .LBB10_1 ; CHECK-BE-NEXT: .LBB10_2: -; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB10_4 -; CHECK-BE-NEXT: .LBB10_3: -; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: bne cr0, .LBB10_3 -; CHECK-BE-NEXT: .LBB10_4: -; CHECK-BE-NEXT: mr r12, r30 -; CHECK-BE-NEXT: stdu r12, -1024(r1) -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: stdux r30, r1, r12 +; CHECK-BE-NEXT: mr r0, r30 +; CHECK-BE-NEXT: .cfi_def_cfa_register r0 ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r30, -16 ; CHECK-BE-NEXT: addi r4, r1, 1024 @@ -786,14 +768,23 @@ ; ; CHECK-32-LABEL: f10: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: clrlwi r0, r12, 22 -; CHECK-32-NEXT: subc r1, r1, r0 -; CHECK-32-NEXT: stwu r12, -1024(r1) -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: clrlwi r12, r1, 22 ; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: li r12, -5120 +; CHECK-32-NEXT: add r0, r12, r0 +; CHECK-32-NEXT: sub r12, r0, r1 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: bge cr0, .LBB10_2 +; CHECK-32-NEXT: .LBB10_1: +; CHECK-32-NEXT: stwu r0, -4096(r1) +; CHECK-32-NEXT: addi r12, r12, 4096 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: blt cr0, .LBB10_1 +; CHECK-32-NEXT: .LBB10_2: +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: .cfi_def_cfa_register r0 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: addic r0, r0, -8 ; CHECK-32-NEXT: stwx r30, 0, r0 @@ -821,35 +812,26 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; CHECK-LE-LABEL: f11: ; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: clrldi r12, r1, 49 ; CHECK-LE-NEXT: std r31, -8(r1) ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 49 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB11_2 -; CHECK-LE-NEXT: # %bb.1: -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: sub r0, r1, r12 +; CHECK-LE-NEXT: lis r12, -2 +; CHECK-LE-NEXT: ori r12, r12, 32768 +; CHECK-LE-NEXT: add r0, r12, r0 +; CHECK-LE-NEXT: sub r12, r0, r1 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB11_2 +; CHECK-LE-NEXT: .LBB11_1: +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: addi r12, r12, 4096 +; CHECK-LE-NEXT: cmpdi r12, -4096 +; CHECK-LE-NEXT: blt cr0, .LBB11_1 ; CHECK-LE-NEXT: .LBB11_2: -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB11_4 -; CHECK-LE-NEXT: .LBB11_3: -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB11_3 -; CHECK-LE-NEXT: .LBB11_4: -; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: li r0, 24 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: .LBB11_5: -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: bdnz .LBB11_5 -; CHECK-LE-NEXT: # %bb.6: -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdux r30, r1, r12 +; CHECK-LE-NEXT: mr r0, r30 +; CHECK-LE-NEXT: .cfi_def_cfa_register r0 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r31, -8 ; CHECK-LE-NEXT: .cfi_offset r30, -16 @@ -876,12 +858,12 @@ ; CHECK-LE-NEXT: add r4, r1, r7 ; CHECK-LE-NEXT: stdux r3, r1, r5 ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: beq cr0, .LBB11_8 -; CHECK-LE-NEXT: .LBB11_7: +; CHECK-LE-NEXT: beq cr0, .LBB11_4 +; CHECK-LE-NEXT: .LBB11_3: ; CHECK-LE-NEXT: stdu r3, -4096(r1) ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: bne cr0, .LBB11_7 -; CHECK-LE-NEXT: .LBB11_8: +; CHECK-LE-NEXT: bne cr0, .LBB11_3 +; CHECK-LE-NEXT: .LBB11_4: ; CHECK-LE-NEXT: addi r3, r1, -32768 ; CHECK-LE-NEXT: lbz r3, 0(r3) ; CHECK-LE-NEXT: mr r1, r30 @@ -891,35 +873,26 @@ ; ; CHECK-BE-LABEL: f11: ; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: clrldi r12, r1, 49 ; CHECK-BE-NEXT: std r31, -8(r1) ; CHECK-BE-NEXT: std r30, -16(r1) ; CHECK-BE-NEXT: mr r30, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-BE-NEXT: clrldi r0, r30, 49 -; CHECK-BE-NEXT: subc r12, r30, r0 -; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB11_2 -; CHECK-BE-NEXT: # %bb.1: -; CHECK-BE-NEXT: neg r0, r0 -; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: sub r0, r1, r12 +; CHECK-BE-NEXT: lis r12, -2 +; CHECK-BE-NEXT: ori r12, r12, 32768 +; CHECK-BE-NEXT: add r0, r12, r0 +; CHECK-BE-NEXT: sub r12, r0, r1 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: bge cr0, .LBB11_2 +; CHECK-BE-NEXT: .LBB11_1: +; CHECK-BE-NEXT: stdu r30, -4096(r1) +; CHECK-BE-NEXT: addi r12, r12, 4096 +; CHECK-BE-NEXT: cmpdi r12, -4096 +; CHECK-BE-NEXT: blt cr0, .LBB11_1 ; CHECK-BE-NEXT: .LBB11_2: -; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB11_4 -; CHECK-BE-NEXT: .LBB11_3: -; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: bne cr0, .LBB11_3 -; CHECK-BE-NEXT: .LBB11_4: -; CHECK-BE-NEXT: mr r12, r30 -; CHECK-BE-NEXT: li r0, 24 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: .LBB11_5: -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: bdnz .LBB11_5 -; CHECK-BE-NEXT: # %bb.6: -; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: stdux r30, r1, r12 +; CHECK-BE-NEXT: mr r0, r30 +; CHECK-BE-NEXT: .cfi_def_cfa_register r0 ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r31, -8 ; CHECK-BE-NEXT: .cfi_offset r30, -16 @@ -946,12 +919,12 @@ ; CHECK-BE-NEXT: add r4, r1, r7 ; CHECK-BE-NEXT: stdux r3, r1, r5 ; CHECK-BE-NEXT: cmpd r1, r4 -; CHECK-BE-NEXT: beq cr0, .LBB11_8 -; CHECK-BE-NEXT: .LBB11_7: +; CHECK-BE-NEXT: beq cr0, .LBB11_4 +; CHECK-BE-NEXT: .LBB11_3: ; CHECK-BE-NEXT: stdu r3, -4096(r1) ; CHECK-BE-NEXT: cmpd r1, r4 -; CHECK-BE-NEXT: bne cr0, .LBB11_7 -; CHECK-BE-NEXT: .LBB11_8: +; CHECK-BE-NEXT: bne cr0, .LBB11_3 +; CHECK-BE-NEXT: .LBB11_4: ; CHECK-BE-NEXT: addi r3, r1, -32768 ; CHECK-BE-NEXT: lbz r3, 0(r3) ; CHECK-BE-NEXT: mr r1, r30 @@ -961,18 +934,24 @@ ; ; CHECK-32-LABEL: f11: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: clrlwi r0, r12, 17 -; CHECK-32-NEXT: subc r1, r1, r0 -; CHECK-32-NEXT: li r0, 24 -; CHECK-32-NEXT: mtctr r0 -; CHECK-32-NEXT: .LBB11_1: -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: bdnz .LBB11_1 -; CHECK-32-NEXT: # %bb.2: -; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: clrlwi r12, r1, 17 ; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: lis r12, -2 +; CHECK-32-NEXT: ori r12, r12, 32768 +; CHECK-32-NEXT: add r0, r12, r0 +; CHECK-32-NEXT: sub r12, r0, r1 +; CHECK-32-NEXT: mr r0, r1 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: bge cr0, .LBB11_2 +; CHECK-32-NEXT: .LBB11_1: +; CHECK-32-NEXT: stwu r0, -4096(r1) +; CHECK-32-NEXT: addi r12, r12, 4096 +; CHECK-32-NEXT: cmpwi r12, -4096 +; CHECK-32-NEXT: blt cr0, .LBB11_1 +; CHECK-32-NEXT: .LBB11_2: +; CHECK-32-NEXT: stwux r0, r1, r12 +; CHECK-32-NEXT: .cfi_def_cfa_register r0 +; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: addic r0, r0, -4 ; CHECK-32-NEXT: stwx r31, 0, r0