diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1187,7 +1187,6 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const { - // TODO: Generate CFI instructions. bool isPPC64 = Subtarget.isPPC64(); const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); @@ -1272,212 +1271,209 @@ .addReg(SPReg) .addReg(NegSizeReg); }; - // Used to probe realignment gap [stackptr - (stackptr % align), stackptr) - // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30 - // available and r1 is already copied to r30 which is BPReg. So BPReg stores - // the value of stackptr. - // First we have to probe tail interval whose size is less than probesize, - // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage, - // ScratchReg stores the value of ((stackptr % align) % probesize). Then we - // probe each block sized probesize until stackptr meets - // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized - // as negprobesize. At both stages, TempReg stores the value of - // (stackptr - (stackptr % align)). + // Used to probe stack that realignment is required. + // TempReg(r12) is supposed to store the value of final stackptr, which is + // immutable during probe. + // ScratchReg(r0) is used to store intermediate values. + // When HasBP && isPPC64, we have r0, r1, r12, r30 available and r1 is already + // copied to r30 which is BPReg. So BPReg stores the value of back-chain + // pointer. Otherwise, we only have r0, r1, r12 available and at such + // situation, back-chain pointer is loaded from 0(r1) at every probe. auto dynamicProbe = [&](MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register ScratchReg, Register TempReg) { - assert(HasBP && isPPC64 && "Probe alignment part not available"); assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); - // ScratchReg = stackptr % align - BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) - .addReg(BPReg) - .addImm(0) - .addImm(64 - Log2(MaxAlign)); - // TempReg = stackptr - (stackptr % align) - BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg) - .addReg(ScratchReg) - .addReg(BPReg); - // ScratchReg = (stackptr % align) % probesize - BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) - .addReg(ScratchReg) - .addImm(0) - .addImm(64 - Log2(ProbeSize)); + assert(ProbeSize >= Subtarget.getRedZoneSize() && + "Probe size should be larger or equal to the size of red-zone so " + "that red-zone is not clobbered by probing"); + bool HasBackChainPointerInBPReg = HasBP && isPPC64; + Register &FinalStackPtr = TempReg; + // FIXME: We can only support NegProbeSize materialized by DForm currently. + NegProbeSize = std::max(NegProbeSize, -(1L << 15)); + assert(isInt<16>(NegProbeSize) && + "NegProbeSize should be materialized by DForm"); + auto getBackChainPointer = [&](MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + Register Result) { + if (HasBackChainPointerInBPReg) + return BPReg; + BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LD : PPC::LWZ), Result) + .addImm(0) + .addReg(SPReg); + return Result; + }; Register CRReg = PPC::CR0; - // If (stackptr % align) % probesize == 0, we should not generate probe - // code. Layout of output assembly kinda like: + // Layout of output assembly kinda like: // bb.0: // ... - // cmpldi $scratchreg, 0 - // beq bb.2 - // bb.1: # Probe tail interval - // neg $scratchreg, $scratchreg - // stdux $bpreg, r1, $scratchreg + // sub $scratchreg, $finalsp, r1 + // cmpdi $scratchreg, + // bge bb.2 + // bb.1: + // stdux , (r1) + // sub $scratchreg, $finalsp, r1 + // cmpdi $scratchreg, + // blt bb.1 // bb.2: - // - // cmpd r1, $tempreg - // beq bb.4 - // bb.3: # Loop to probe each block - // stdux $bpreg, r1, $scratchreg - // cmpd r1, $tempreg - // bne bb.3 - // bb.4: - // ... + // stdux , r1, $scratchreg MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); - MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ProbeResidualMBB); - MachineBasicBlock *ProbeLoopPreHeaderMBB = - MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB); MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); MF.insert(MBBInsertPoint, ProbeExitMBB); - // bb.4 + // bb.2 + { + Register BackChainPointer = + getBackChainPointer(*ProbeExitMBB, ProbeExitMBB->end(), TempReg); + allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, + BackChainPointer); + if (HasBackChainPointerInBPReg) + // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64. + BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg) + .addReg(BPReg) + .addReg(BPReg); + } ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); // bb.0 - BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0); - BuildMI(&MBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_EQ) - .addReg(CRReg) - .addMBB(ProbeLoopPreHeaderMBB); - MBB.addSuccessor(ProbeResidualMBB); - MBB.addSuccessor(ProbeLoopPreHeaderMBB); - // bb.1 - BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg) - .addReg(ScratchReg); - allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg, - false, BPReg); - ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB); - // bb.2 - MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(), - NegProbeSize, ScratchReg); - BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg) + BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg) .addReg(SPReg) - .addReg(TempReg); - BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_EQ) + .addReg(FinalStackPtr); + BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg) + .addReg(ScratchReg) + .addImm(NegProbeSize); + BuildMI(&MBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_GE) .addReg(CRReg) .addMBB(ProbeExitMBB); - ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB); - ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB); - // bb.3 - allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg, - false, BPReg); - BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg) + MBB.addSuccessor(ProbeLoopBodyMBB); + MBB.addSuccessor(ProbeExitMBB); + // bb.1 + { + Register BackChainPointer = getBackChainPointer( + *ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), ScratchReg); + allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, + 0, true, BackChainPointer); + } + BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), + ScratchReg) .addReg(SPReg) - .addReg(TempReg); + .addReg(FinalStackPtr); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), + CRReg) + .addReg(ScratchReg) + .addImm(NegProbeSize); BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_NE) + .addImm(PPC::PRED_LT) .addReg(CRReg) .addMBB(ProbeLoopBodyMBB); ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); // Update liveins. - recomputeLiveIns(*ProbeResidualMBB); - recomputeLiveIns(*ProbeLoopPreHeaderMBB); recomputeLiveIns(*ProbeLoopBodyMBB); recomputeLiveIns(*ProbeExitMBB); return ProbeExitMBB; }; // For case HasBP && MaxAlign > 1, we have to realign the SP by performing - // SP = SP - SP % MaxAlign. + // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since + // the offset subtracted from SP is determined by SP's runtime value. if (HasBP && MaxAlign > 1) { - // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in - // 64-bit mode. - if (isPPC64) { - // Use BPReg to calculate CFA. - if (needsCFI) - buildDefCFA(*CurrentMBB, {MI}, BPReg, 0); - // Since we have SPReg copied to BPReg at the moment, FPReg can be used as - // TempReg. - Register TempReg = FPReg; - CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg); - // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64. - BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) - .addReg(BPReg) - .addReg(BPReg); - } else { - // Initialize current frame pointer. - BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) + // Calculate final stack pointer. + if (isPPC64) + BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) .addReg(SPReg) - .addReg(SPReg); - // Use FPReg to calculate CFA. - if (needsCFI) - buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); + .addImm(0) + .addImm(64 - Log2(MaxAlign)); + else BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) - .addReg(FPReg) + .addReg(SPReg) .addImm(0) .addImm(32 - Log2(MaxAlign)) .addImm(31); - BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg) - .addReg(ScratchReg) - .addReg(SPReg); - } + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), + FPReg) + .addReg(ScratchReg) + .addReg(SPReg); + MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4), + FPReg) + .addReg(ScratchReg) + .addReg(FPReg); + CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, FPReg); + if (needsCFI) + buildDefCFAReg(*CurrentMBB, {MI}, FPReg); } else { // Initialize current frame pointer. BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); // Use FPReg to calculate CFA. if (needsCFI) buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); - } - // Probe residual part. - if (NegResidualSize) { - bool ResidualUseDForm = CanUseDForm(NegResidualSize); - if (!ResidualUseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); - allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, - ResidualUseDForm, FPReg); - } - bool UseDForm = CanUseDForm(NegProbeSize); - // If number of blocks is small, just probe them directly. - if (NumBlocks < 3) { - if (!UseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); - for (int i = 0; i < NumBlocks; ++i) - allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, - FPReg); - if (needsCFI) { - // Restore using SPReg to calculate CFA. - buildDefCFAReg(*CurrentMBB, {MI}, SPReg); - } - } else { - // Since CTR is a volatile register and current shrinkwrap implementation - // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a - // CTR loop to probe. - // Calculate trip count and stores it in CTRReg. - MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); - BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) - .addReg(ScratchReg, RegState::Kill); - if (!UseDForm) - MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); - // Create MBBs of the loop. - MachineFunction::iterator MBBInsertPoint = - std::next(CurrentMBB->getIterator()); - MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, LoopMBB); - MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); - MF.insert(MBBInsertPoint, ExitMBB); - // Synthesize the loop body. - allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, - UseDForm, FPReg); - BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) - .addMBB(LoopMBB); - LoopMBB->addSuccessor(ExitMBB); - LoopMBB->addSuccessor(LoopMBB); - // Synthesize the exit MBB. - ExitMBB->splice(ExitMBB->end(), CurrentMBB, - std::next(MachineBasicBlock::iterator(MI)), - CurrentMBB->end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); - CurrentMBB->addSuccessor(LoopMBB); - if (needsCFI) { - // Restore using SPReg to calculate CFA. - buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); + bool UseDForm = CanUseDForm(NegProbeSize); + // If number of blocks is small, just probe them directly. + if (NumBlocks < 3) { + if (!UseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); + for (int i = 0; i < NumBlocks; ++i) + allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, + FPReg); + // Probe residual part. + if (NegResidualSize) { + bool ResidualUseDForm = CanUseDForm(NegResidualSize); + if (!ResidualUseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); + allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, + ResidualUseDForm, FPReg); + } + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*CurrentMBB, {MI}, SPReg); + } + } else { + // Since CTR is a volatile register and current shrinkwrap implementation + // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize + // a CTR loop to probe. Calculate trip count and stores it in CTRReg. + MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, + TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) + .addReg(ScratchReg, RegState::Kill); + if (!UseDForm) + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); + // Create MBBs of the loop. + MachineFunction::iterator MBBInsertPoint = + std::next(CurrentMBB->getIterator()); + MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, LoopMBB); + MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ExitMBB); + // Synthesize the loop body. + allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, + UseDForm, FPReg); + BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) + .addMBB(LoopMBB); + LoopMBB->addSuccessor(ExitMBB); + LoopMBB->addSuccessor(LoopMBB); + // Synthesize the exit MBB. + // Probe residual part. + if (NegResidualSize) { + bool ResidualUseDForm = CanUseDForm(NegResidualSize); + if (!ResidualUseDForm) + MaterializeImm(*ExitMBB, ExitMBB->end(), NegResidualSize, ScratchReg); + allocateAndProbe(*ExitMBB, ExitMBB->end(), NegResidualSize, ScratchReg, + ResidualUseDForm, FPReg); + } + ExitMBB->splice(ExitMBB->end(), CurrentMBB, + std::next(MachineBasicBlock::iterator(MI)), + CurrentMBB->end()); + ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); + CurrentMBB->addSuccessor(LoopMBB); + if (needsCFI) { + // Restore using SPReg to calculate CFA. + buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); + } + // Update liveins. + recomputeLiveIns(*LoopMBB); + recomputeLiveIns(*ExitMBB); } - // Update liveins. - recomputeLiveIns(*LoopMBB); - recomputeLiveIns(*ExitMBB); } ++NumPrologProbed; MI.eraseFromParent(); diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll --- a/llvm/test/CodeGen/PowerPC/pr46759.ll +++ b/llvm/test/CodeGen/PowerPC/pr46759.ll @@ -6,32 +6,25 @@ define void @foo(i32 %vla_size) #0 { ; CHECK-LE-LABEL: foo: ; CHECK-LE: # %bb.0: # %entry +; CHECK-LE-NEXT: clrldi r0, r1, 53 ; CHECK-LE-NEXT: std r31, -8(r1) ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 53 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB0_2 -; CHECK-LE-NEXT: # %bb.1: # %entry -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: .LBB0_2: # %entry -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB0_4 -; CHECK-LE-NEXT: .LBB0_3: # %entry +; CHECK-LE-NEXT: sub r12, r1, r0 +; CHECK-LE-NEXT: addi r12, r12, -6144 +; CHECK-LE-NEXT: .LBB0_1: # %entry ; CHECK-LE-NEXT: # +; CHECK-LE-NEXT: sub r0, r12, r1 +; CHECK-LE-NEXT: cmpdi r0, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB0_3 +; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: # +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: b .LBB0_1 +; CHECK-LE-NEXT: .LBB0_3: # %entry ; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB0_3 -; CHECK-LE-NEXT: .LBB0_4: # %entry ; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: stdu r12, -2048(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_register r12 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r31, -8 ; CHECK-LE-NEXT: .cfi_offset r30, -16 @@ -52,13 +45,13 @@ ; CHECK-LE-NEXT: add r4, r1, r4 ; CHECK-LE-NEXT: stdux r3, r1, r5 ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: beq cr0, .LBB0_6 -; CHECK-LE-NEXT: .LBB0_5: # %entry +; CHECK-LE-NEXT: beq cr0, .LBB0_5 +; CHECK-LE-NEXT: .LBB0_4: # %entry ; CHECK-LE-NEXT: # ; CHECK-LE-NEXT: stdu r3, -4096(r1) ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: bne cr0, .LBB0_5 -; CHECK-LE-NEXT: .LBB0_6: # %entry +; CHECK-LE-NEXT: bne cr0, .LBB0_4 +; CHECK-LE-NEXT: .LBB0_5: # %entry ; CHECK-LE-NEXT: addi r3, r1, 2048 ; CHECK-LE-NEXT: lbz r3, 0(r3) ; CHECK-LE-NEXT: mr r1, r30 diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll --- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll @@ -103,7 +103,6 @@ ; CHECK-LE-LABEL: f2: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: li r0, 16 ; CHECK-LE-NEXT: mtctr r0 ; CHECK-LE-NEXT: .LBB2_1: # %entry @@ -111,6 +110,7 @@ ; CHECK-LE-NEXT: stdu r12, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB2_1 ; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) ; CHECK-LE-NEXT: lbz r3, 48(r1) @@ -120,7 +120,6 @@ ; CHECK-BE-LABEL: f2: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: li r0, 16 ; CHECK-BE-NEXT: mtctr r0 ; CHECK-BE-NEXT: .LBB2_1: # %entry @@ -128,6 +127,7 @@ ; CHECK-BE-NEXT: stdu r12, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB2_1 ; CHECK-BE-NEXT: # %bb.2: # %entry +; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) ; CHECK-BE-NEXT: lbz r3, 64(r1) @@ -137,7 +137,6 @@ ; CHECK-32-LABEL: f2: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: li r0, 16 ; CHECK-32-NEXT: mtctr r0 ; CHECK-32-NEXT: .LBB2_1: # %entry @@ -145,6 +144,7 @@ ; CHECK-32-NEXT: stwu r12, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB2_1 ; CHECK-32-NEXT: # %bb.2: # %entry +; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: sub r0, r1, r12 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 @@ -167,9 +167,9 @@ ; CHECK-LE-LABEL: f3: ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: stdu r12, -32768(r1) ; CHECK-LE-NEXT: stdu r12, -32768(r1) +; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) ; CHECK-LE-NEXT: lbz r3, 48(r1) @@ -179,9 +179,9 @@ ; CHECK-BE-LABEL: f3: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: stdu r12, -32768(r1) ; CHECK-BE-NEXT: stdu r12, -32768(r1) +; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) ; CHECK-BE-NEXT: lbz r3, 64(r1) @@ -191,9 +191,9 @@ ; CHECK-32-LABEL: f3: ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: stwu r12, -32768(r1) ; CHECK-32-NEXT: stwu r12, -32768(r1) +; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: sub r0, r1, r12 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 @@ -261,9 +261,8 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind { ; CHECK-LE-LABEL: f5: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: li r0, 16 +; CHECK-LE-NEXT: mr r12, r1 ; CHECK-LE-NEXT: mtctr r0 ; CHECK-LE-NEXT: lis r0, -1 ; CHECK-LE-NEXT: nop @@ -272,6 +271,7 @@ ; CHECK-LE-NEXT: stdux r12, r1, r0 ; CHECK-LE-NEXT: bdnz .LBB5_1 ; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) ; CHECK-LE-NEXT: lbz r3, 48(r1) @@ -280,17 +280,17 @@ ; ; CHECK-BE-LABEL: f5: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: li r0, 16 ; CHECK-BE-NEXT: mtctr r0 ; CHECK-BE-NEXT: lis r0, -1 +; CHECK-BE-NEXT: mr r12, r1 ; CHECK-BE-NEXT: nop ; CHECK-BE-NEXT: .LBB5_1: # %entry ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: stdux r12, r1, r0 ; CHECK-BE-NEXT: bdnz .LBB5_1 ; CHECK-BE-NEXT: # %bb.2: # %entry +; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) ; CHECK-BE-NEXT: lbz r3, 64(r1) @@ -299,17 +299,17 @@ ; ; CHECK-32-LABEL: f5: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: li r0, 16 ; CHECK-32-NEXT: mtctr r0 ; CHECK-32-NEXT: lis r0, -1 +; CHECK-32-NEXT: mr r12, r1 ; CHECK-32-NEXT: nop ; CHECK-32-NEXT: .LBB5_1: # %entry ; CHECK-32-NEXT: # ; CHECK-32-NEXT: stwux r12, r1, r0 ; CHECK-32-NEXT: bdnz .LBB5_1 ; CHECK-32-NEXT: # %bb.2: # %entry +; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: sub r0, r1, r12 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 @@ -331,9 +331,8 @@ define i8 @f6() #0 nounwind { ; CHECK-LE-LABEL: f6: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: lis r0, 4 +; CHECK-LE-NEXT: mr r12, r1 ; CHECK-LE-NEXT: nop ; CHECK-LE-NEXT: mtctr r0 ; CHECK-LE-NEXT: .LBB6_1: # %entry @@ -341,6 +340,7 @@ ; CHECK-LE-NEXT: stdu r12, -4096(r1) ; CHECK-LE-NEXT: bdnz .LBB6_1 ; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) ; CHECK-LE-NEXT: lbz r3, 48(r1) @@ -349,9 +349,8 @@ ; ; CHECK-BE-LABEL: f6: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: lis r0, 4 +; CHECK-BE-NEXT: mr r12, r1 ; CHECK-BE-NEXT: nop ; CHECK-BE-NEXT: mtctr r0 ; CHECK-BE-NEXT: .LBB6_1: # %entry @@ -359,6 +358,7 @@ ; CHECK-BE-NEXT: stdu r12, -4096(r1) ; CHECK-BE-NEXT: bdnz .LBB6_1 ; CHECK-BE-NEXT: # %bb.2: # %entry +; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) ; CHECK-BE-NEXT: lbz r3, 64(r1) @@ -367,9 +367,8 @@ ; ; CHECK-32-LABEL: f6: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: lis r0, 4 +; CHECK-32-NEXT: mr r12, r1 ; CHECK-32-NEXT: nop ; CHECK-32-NEXT: mtctr r0 ; CHECK-32-NEXT: .LBB6_1: # %entry @@ -377,6 +376,7 @@ ; CHECK-32-NEXT: stwu r12, -4096(r1) ; CHECK-32-NEXT: bdnz .LBB6_1 ; CHECK-32-NEXT: # %bb.2: # %entry +; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: sub r0, r1, r12 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 @@ -398,11 +398,8 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind { ; CHECK-LE-LABEL: f7: ; CHECK-LE: # %bb.0: # %entry -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: ori r0, r0, 13776 -; CHECK-LE-NEXT: stdux r12, r1, r0 ; CHECK-LE-NEXT: li r0, 15258 +; CHECK-LE-NEXT: mr r12, r1 ; CHECK-LE-NEXT: mtctr r0 ; CHECK-LE-NEXT: lis r0, -1 ; CHECK-LE-NEXT: nop @@ -411,6 +408,9 @@ ; CHECK-LE-NEXT: stdux r12, r1, r0 ; CHECK-LE-NEXT: bdnz .LBB7_1 ; CHECK-LE-NEXT: # %bb.2: # %entry +; CHECK-LE-NEXT: lis r0, -1 +; CHECK-LE-NEXT: ori r0, r0, 13776 +; CHECK-LE-NEXT: stdux r12, r1, r0 ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 41(r1) ; CHECK-LE-NEXT: lbz r3, 41(r1) @@ -419,19 +419,19 @@ ; ; CHECK-BE-LABEL: f7: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: mr r12, r1 -; CHECK-BE-NEXT: ori r0, r0, 13760 -; CHECK-BE-NEXT: stdux r12, r1, r0 ; CHECK-BE-NEXT: li r0, 15258 ; CHECK-BE-NEXT: mtctr r0 ; CHECK-BE-NEXT: lis r0, -1 +; CHECK-BE-NEXT: mr r12, r1 ; CHECK-BE-NEXT: nop ; CHECK-BE-NEXT: .LBB7_1: # %entry ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: stdux r12, r1, r0 ; CHECK-BE-NEXT: bdnz .LBB7_1 ; CHECK-BE-NEXT: # %bb.2: # %entry +; CHECK-BE-NEXT: lis r0, -1 +; CHECK-BE-NEXT: ori r0, r0, 13760 +; CHECK-BE-NEXT: stdux r12, r1, r0 ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 57(r1) ; CHECK-BE-NEXT: lbz r3, 57(r1) @@ -440,19 +440,19 @@ ; ; CHECK-32-LABEL: f7: ; CHECK-32: # %bb.0: # %entry -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: ori r0, r0, 13808 -; CHECK-32-NEXT: stwux r12, r1, r0 ; CHECK-32-NEXT: li r0, 15258 ; CHECK-32-NEXT: mtctr r0 ; CHECK-32-NEXT: lis r0, -1 +; CHECK-32-NEXT: mr r12, r1 ; CHECK-32-NEXT: nop ; CHECK-32-NEXT: .LBB7_1: # %entry ; CHECK-32-NEXT: # ; CHECK-32-NEXT: stwux r12, r1, r0 ; CHECK-32-NEXT: bdnz .LBB7_1 ; CHECK-32-NEXT: # %bb.2: # %entry +; CHECK-32-NEXT: lis r0, -1 +; CHECK-32-NEXT: ori r0, r0, 13808 +; CHECK-32-NEXT: stwux r12, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r12 ; CHECK-32-NEXT: li r3, 3 ; CHECK-32-NEXT: sub r0, r1, r0 diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll --- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll @@ -113,7 +113,6 @@ ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: mr r12, r1 ; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: li r0, 16 ; CHECK-LE-NEXT: mtctr r0 ; CHECK-LE-NEXT: .LBB2_1: # %entry @@ -122,6 +121,7 @@ ; CHECK-LE-NEXT: bdnz .LBB2_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: .cfi_def_cfa_offset 65584 ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) @@ -133,7 +133,6 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mr r12, r1 ; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: li r0, 16 ; CHECK-BE-NEXT: mtctr r0 ; CHECK-BE-NEXT: .LBB2_1: # %entry @@ -142,6 +141,7 @@ ; CHECK-BE-NEXT: bdnz .LBB2_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: .cfi_def_cfa_offset 65600 ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) @@ -153,7 +153,6 @@ ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: mr r12, r1 ; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: li r0, 16 ; CHECK-32-NEXT: mtctr r0 ; CHECK-32-NEXT: .LBB2_1: # %entry @@ -162,6 +161,7 @@ ; CHECK-32-NEXT: bdnz .LBB2_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: sub r0, r1, r12 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 65552 @@ -186,9 +186,9 @@ ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: mr r12, r1 ; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: stdu r12, -32768(r1) ; CHECK-LE-NEXT: stdu r12, -32768(r1) +; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 ; CHECK-LE-NEXT: .cfi_def_cfa_offset 65584 ; CHECK-LE-NEXT: li r3, 3 @@ -201,9 +201,9 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mr r12, r1 ; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: stdu r12, -32768(r1) ; CHECK-BE-NEXT: stdu r12, -32768(r1) +; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 ; CHECK-BE-NEXT: .cfi_def_cfa_offset 65600 ; CHECK-BE-NEXT: li r3, 3 @@ -216,9 +216,9 @@ ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: mr r12, r1 ; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: stwu r12, -32768(r1) ; CHECK-32-NEXT: stwu r12, -32768(r1) +; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: .cfi_def_cfa_register r1 ; CHECK-32-NEXT: sub r0, r1, r12 ; CHECK-32-NEXT: sub r0, r1, r0 @@ -293,7 +293,6 @@ ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: mr r12, r1 ; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: li r0, 16 ; CHECK-LE-NEXT: mtctr r0 ; CHECK-LE-NEXT: lis r0, -1 @@ -304,6 +303,7 @@ ; CHECK-LE-NEXT: bdnz .LBB5_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: .cfi_def_cfa_offset 1048624 ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) @@ -315,7 +315,6 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mr r12, r1 ; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: li r0, 16 ; CHECK-BE-NEXT: mtctr r0 ; CHECK-BE-NEXT: lis r0, -1 @@ -326,6 +325,7 @@ ; CHECK-BE-NEXT: bdnz .LBB5_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: .cfi_def_cfa_offset 1048640 ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) @@ -337,7 +337,6 @@ ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: mr r12, r1 ; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: li r0, 16 ; CHECK-32-NEXT: mtctr r0 ; CHECK-32-NEXT: lis r0, -1 @@ -348,6 +347,7 @@ ; CHECK-32-NEXT: bdnz .LBB5_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: sub r0, r1, r12 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 1048592 @@ -372,7 +372,6 @@ ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: mr r12, r1 ; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: lis r0, 4 ; CHECK-LE-NEXT: nop ; CHECK-LE-NEXT: mtctr r0 @@ -382,6 +381,7 @@ ; CHECK-LE-NEXT: bdnz .LBB6_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: stdu r12, -48(r1) ; CHECK-LE-NEXT: .cfi_def_cfa_offset 1073741872 ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 48(r1) @@ -393,7 +393,6 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mr r12, r1 ; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: lis r0, 4 ; CHECK-BE-NEXT: nop ; CHECK-BE-NEXT: mtctr r0 @@ -403,6 +402,7 @@ ; CHECK-BE-NEXT: bdnz .LBB6_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: stdu r12, -64(r1) ; CHECK-BE-NEXT: .cfi_def_cfa_offset 1073741888 ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 64(r1) @@ -414,7 +414,6 @@ ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: mr r12, r1 ; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: lis r0, 4 ; CHECK-32-NEXT: nop ; CHECK-32-NEXT: mtctr r0 @@ -424,6 +423,7 @@ ; CHECK-32-NEXT: bdnz .LBB6_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: stwu r12, -16(r1) ; CHECK-32-NEXT: sub r0, r1, r12 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 1073741840 @@ -448,9 +448,6 @@ ; CHECK-LE: # %bb.0: # %entry ; CHECK-LE-NEXT: mr r12, r1 ; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: lis r0, -1 -; CHECK-LE-NEXT: ori r0, r0, 13776 -; CHECK-LE-NEXT: stdux r12, r1, r0 ; CHECK-LE-NEXT: li r0, 15258 ; CHECK-LE-NEXT: mtctr r0 ; CHECK-LE-NEXT: lis r0, -1 @@ -461,6 +458,9 @@ ; CHECK-LE-NEXT: bdnz .LBB7_1 ; CHECK-LE-NEXT: # %bb.2: # %entry ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: lis r0, -1 +; CHECK-LE-NEXT: ori r0, r0, 13776 +; CHECK-LE-NEXT: stdux r12, r1, r0 ; CHECK-LE-NEXT: .cfi_def_cfa_offset 1000000048 ; CHECK-LE-NEXT: li r3, 3 ; CHECK-LE-NEXT: stb r3, 41(r1) @@ -472,9 +472,6 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mr r12, r1 ; CHECK-BE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-BE-NEXT: lis r0, -1 -; CHECK-BE-NEXT: ori r0, r0, 13760 -; CHECK-BE-NEXT: stdux r12, r1, r0 ; CHECK-BE-NEXT: li r0, 15258 ; CHECK-BE-NEXT: mtctr r0 ; CHECK-BE-NEXT: lis r0, -1 @@ -485,6 +482,9 @@ ; CHECK-BE-NEXT: bdnz .LBB7_1 ; CHECK-BE-NEXT: # %bb.2: # %entry ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: lis r0, -1 +; CHECK-BE-NEXT: ori r0, r0, 13760 +; CHECK-BE-NEXT: stdux r12, r1, r0 ; CHECK-BE-NEXT: .cfi_def_cfa_offset 1000000064 ; CHECK-BE-NEXT: li r3, 3 ; CHECK-BE-NEXT: stb r3, 57(r1) @@ -496,9 +496,6 @@ ; CHECK-32: # %bb.0: # %entry ; CHECK-32-NEXT: mr r12, r1 ; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: lis r0, -1 -; CHECK-32-NEXT: ori r0, r0, 13808 -; CHECK-32-NEXT: stwux r12, r1, r0 ; CHECK-32-NEXT: li r0, 15258 ; CHECK-32-NEXT: mtctr r0 ; CHECK-32-NEXT: lis r0, -1 @@ -509,6 +506,9 @@ ; CHECK-32-NEXT: bdnz .LBB7_1 ; CHECK-32-NEXT: # %bb.2: # %entry ; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: lis r0, -1 +; CHECK-32-NEXT: ori r0, r0, 13808 +; CHECK-32-NEXT: stwux r12, r1, r0 ; CHECK-32-NEXT: sub r0, r1, r12 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: .cfi_def_cfa_offset 1000000016 @@ -599,31 +599,22 @@ define i32 @f9(i64 %i) local_unnamed_addr #0 { ; CHECK-LE-LABEL: f9: ; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: clrldi r0, r1, 53 ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 53 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB9_2 -; CHECK-LE-NEXT: # %bb.1: -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: .LBB9_2: -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB9_4 +; CHECK-LE-NEXT: sub r12, r1, r0 +; CHECK-LE-NEXT: addi r12, r12, -10240 +; CHECK-LE-NEXT: .LBB9_1: +; CHECK-LE-NEXT: sub r0, r12, r1 +; CHECK-LE-NEXT: cmpdi r0, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB9_3 +; CHECK-LE-NEXT: # %bb.2: +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: b .LBB9_1 ; CHECK-LE-NEXT: .LBB9_3: ; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB9_3 -; CHECK-LE-NEXT: .LBB9_4: ; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: stdu r12, -2048(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_register r12 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r30, -16 ; CHECK-LE-NEXT: addi r4, r1, 2048 @@ -637,31 +628,22 @@ ; ; CHECK-BE-LABEL: f9: ; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: clrldi r0, r1, 53 ; CHECK-BE-NEXT: std r30, -16(r1) ; CHECK-BE-NEXT: mr r30, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-BE-NEXT: clrldi r0, r30, 53 -; CHECK-BE-NEXT: subc r12, r30, r0 -; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB9_2 -; CHECK-BE-NEXT: # %bb.1: -; CHECK-BE-NEXT: neg r0, r0 -; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: .LBB9_2: -; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB9_4 +; CHECK-BE-NEXT: sub r12, r1, r0 +; CHECK-BE-NEXT: addi r12, r12, -10240 +; CHECK-BE-NEXT: .LBB9_1: +; CHECK-BE-NEXT: sub r0, r12, r1 +; CHECK-BE-NEXT: cmpdi r0, -4096 +; CHECK-BE-NEXT: bge cr0, .LBB9_3 +; CHECK-BE-NEXT: # %bb.2: +; CHECK-BE-NEXT: stdu r30, -4096(r1) +; CHECK-BE-NEXT: b .LBB9_1 ; CHECK-BE-NEXT: .LBB9_3: ; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: bne cr0, .LBB9_3 -; CHECK-BE-NEXT: .LBB9_4: ; CHECK-BE-NEXT: mr r12, r30 -; CHECK-BE-NEXT: stdu r12, -2048(r1) -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: .cfi_def_cfa_register r12 ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r30, -16 ; CHECK-BE-NEXT: addi r4, r1, 2048 @@ -675,14 +657,21 @@ ; ; CHECK-32-LABEL: f9: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: clrlwi r0, r12, 21 -; CHECK-32-NEXT: subc r1, r1, r0 -; CHECK-32-NEXT: stwu r12, -2048(r1) -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: clrlwi r0, r1, 21 +; CHECK-32-NEXT: sub r12, r1, r0 +; CHECK-32-NEXT: addi r12, r12, -10240 +; CHECK-32-NEXT: .LBB9_1: +; CHECK-32-NEXT: sub r0, r12, r1 +; CHECK-32-NEXT: cmpwi r0, -4096 +; CHECK-32-NEXT: bge cr0, .LBB9_3 +; CHECK-32-NEXT: # %bb.2: +; CHECK-32-NEXT: lwz r0, 0(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) +; CHECK-32-NEXT: b .LBB9_1 +; CHECK-32-NEXT: .LBB9_3: +; CHECK-32-NEXT: lwz r12, 0(r1) +; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: .cfi_def_cfa_register r12 ; CHECK-32-NEXT: sub r0, r1, r12 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: addic r0, r0, -8 @@ -712,30 +701,22 @@ define i32 @f10(i64 %i) local_unnamed_addr #0 { ; CHECK-LE-LABEL: f10: ; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: clrldi r0, r1, 54 ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 54 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB10_2 -; CHECK-LE-NEXT: # %bb.1: -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: .LBB10_2: -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB10_4 +; CHECK-LE-NEXT: sub r12, r1, r0 +; CHECK-LE-NEXT: addi r12, r12, -5120 +; CHECK-LE-NEXT: .LBB10_1: +; CHECK-LE-NEXT: sub r0, r12, r1 +; CHECK-LE-NEXT: cmpdi r0, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB10_3 +; CHECK-LE-NEXT: # %bb.2: +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: b .LBB10_1 ; CHECK-LE-NEXT: .LBB10_3: ; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB10_3 -; CHECK-LE-NEXT: .LBB10_4: ; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: stdu r12, -1024(r1) -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_register r12 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r30, -16 ; CHECK-LE-NEXT: addi r4, r1, 1024 @@ -749,30 +730,22 @@ ; ; CHECK-BE-LABEL: f10: ; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: clrldi r0, r1, 54 ; CHECK-BE-NEXT: std r30, -16(r1) ; CHECK-BE-NEXT: mr r30, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-BE-NEXT: clrldi r0, r30, 54 -; CHECK-BE-NEXT: subc r12, r30, r0 -; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB10_2 -; CHECK-BE-NEXT: # %bb.1: -; CHECK-BE-NEXT: neg r0, r0 -; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: .LBB10_2: -; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB10_4 +; CHECK-BE-NEXT: sub r12, r1, r0 +; CHECK-BE-NEXT: addi r12, r12, -5120 +; CHECK-BE-NEXT: .LBB10_1: +; CHECK-BE-NEXT: sub r0, r12, r1 +; CHECK-BE-NEXT: cmpdi r0, -4096 +; CHECK-BE-NEXT: bge cr0, .LBB10_3 +; CHECK-BE-NEXT: # %bb.2: +; CHECK-BE-NEXT: stdu r30, -4096(r1) +; CHECK-BE-NEXT: b .LBB10_1 ; CHECK-BE-NEXT: .LBB10_3: ; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: bne cr0, .LBB10_3 -; CHECK-BE-NEXT: .LBB10_4: ; CHECK-BE-NEXT: mr r12, r30 -; CHECK-BE-NEXT: stdu r12, -1024(r1) -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: .cfi_def_cfa_register r12 ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r30, -16 ; CHECK-BE-NEXT: addi r4, r1, 1024 @@ -786,13 +759,21 @@ ; ; CHECK-32-LABEL: f10: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: clrlwi r0, r12, 22 -; CHECK-32-NEXT: subc r1, r1, r0 -; CHECK-32-NEXT: stwu r12, -1024(r1) -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: clrlwi r0, r1, 22 +; CHECK-32-NEXT: sub r12, r1, r0 +; CHECK-32-NEXT: addi r12, r12, -5120 +; CHECK-32-NEXT: .LBB10_1: +; CHECK-32-NEXT: sub r0, r12, r1 +; CHECK-32-NEXT: cmpwi r0, -4096 +; CHECK-32-NEXT: bge cr0, .LBB10_3 +; CHECK-32-NEXT: # %bb.2: +; CHECK-32-NEXT: lwz r0, 0(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) +; CHECK-32-NEXT: b .LBB10_1 +; CHECK-32-NEXT: .LBB10_3: +; CHECK-32-NEXT: lwz r12, 0(r1) +; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: .cfi_def_cfa_register r12 ; CHECK-32-NEXT: sub r0, r1, r12 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: addic r0, r0, -8 @@ -821,35 +802,25 @@ define void @f11(i32 %vla_size, i64 %i) #0 { ; CHECK-LE-LABEL: f11: ; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: clrldi r0, r1, 49 ; CHECK-LE-NEXT: std r31, -8(r1) ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-LE-NEXT: clrldi r0, r30, 49 -; CHECK-LE-NEXT: subc r12, r30, r0 -; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB11_2 -; CHECK-LE-NEXT: # %bb.1: -; CHECK-LE-NEXT: neg r0, r0 -; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: .LBB11_2: -; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB11_4 +; CHECK-LE-NEXT: sub r12, r1, r0 +; CHECK-LE-NEXT: lis r0, -2 +; CHECK-LE-NEXT: ori r0, r0, 32768 +; CHECK-LE-NEXT: add r12, r0, r12 +; CHECK-LE-NEXT: .LBB11_1: +; CHECK-LE-NEXT: sub r0, r12, r1 +; CHECK-LE-NEXT: cmpdi r0, -4096 +; CHECK-LE-NEXT: bge cr0, .LBB11_3 +; CHECK-LE-NEXT: # %bb.2: +; CHECK-LE-NEXT: stdu r30, -4096(r1) +; CHECK-LE-NEXT: b .LBB11_1 ; CHECK-LE-NEXT: .LBB11_3: ; CHECK-LE-NEXT: stdux r30, r1, r0 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: bne cr0, .LBB11_3 -; CHECK-LE-NEXT: .LBB11_4: ; CHECK-LE-NEXT: mr r12, r30 -; CHECK-LE-NEXT: li r0, 24 -; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: .LBB11_5: -; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: bdnz .LBB11_5 -; CHECK-LE-NEXT: # %bb.6: -; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_register r12 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r31, -8 ; CHECK-LE-NEXT: .cfi_offset r30, -16 @@ -876,12 +847,12 @@ ; CHECK-LE-NEXT: add r4, r1, r7 ; CHECK-LE-NEXT: stdux r3, r1, r5 ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: beq cr0, .LBB11_8 -; CHECK-LE-NEXT: .LBB11_7: +; CHECK-LE-NEXT: beq cr0, .LBB11_5 +; CHECK-LE-NEXT: .LBB11_4: ; CHECK-LE-NEXT: stdu r3, -4096(r1) ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: bne cr0, .LBB11_7 -; CHECK-LE-NEXT: .LBB11_8: +; CHECK-LE-NEXT: bne cr0, .LBB11_4 +; CHECK-LE-NEXT: .LBB11_5: ; CHECK-LE-NEXT: addi r3, r1, -32768 ; CHECK-LE-NEXT: lbz r3, 0(r3) ; CHECK-LE-NEXT: mr r1, r30 @@ -891,35 +862,25 @@ ; ; CHECK-BE-LABEL: f11: ; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: clrldi r0, r1, 49 ; CHECK-BE-NEXT: std r31, -8(r1) ; CHECK-BE-NEXT: std r30, -16(r1) ; CHECK-BE-NEXT: mr r30, r1 -; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 -; CHECK-BE-NEXT: clrldi r0, r30, 49 -; CHECK-BE-NEXT: subc r12, r30, r0 -; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB11_2 -; CHECK-BE-NEXT: # %bb.1: -; CHECK-BE-NEXT: neg r0, r0 -; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: .LBB11_2: -; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB11_4 +; CHECK-BE-NEXT: sub r12, r1, r0 +; CHECK-BE-NEXT: lis r0, -2 +; CHECK-BE-NEXT: ori r0, r0, 32768 +; CHECK-BE-NEXT: add r12, r0, r12 +; CHECK-BE-NEXT: .LBB11_1: +; CHECK-BE-NEXT: sub r0, r12, r1 +; CHECK-BE-NEXT: cmpdi r0, -4096 +; CHECK-BE-NEXT: bge cr0, .LBB11_3 +; CHECK-BE-NEXT: # %bb.2: +; CHECK-BE-NEXT: stdu r30, -4096(r1) +; CHECK-BE-NEXT: b .LBB11_1 ; CHECK-BE-NEXT: .LBB11_3: ; CHECK-BE-NEXT: stdux r30, r1, r0 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: bne cr0, .LBB11_3 -; CHECK-BE-NEXT: .LBB11_4: ; CHECK-BE-NEXT: mr r12, r30 -; CHECK-BE-NEXT: li r0, 24 -; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: .LBB11_5: -; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: bdnz .LBB11_5 -; CHECK-BE-NEXT: # %bb.6: -; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: .cfi_def_cfa_register r12 ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r31, -8 ; CHECK-BE-NEXT: .cfi_offset r30, -16 @@ -946,12 +907,12 @@ ; CHECK-BE-NEXT: add r4, r1, r7 ; CHECK-BE-NEXT: stdux r3, r1, r5 ; CHECK-BE-NEXT: cmpd r1, r4 -; CHECK-BE-NEXT: beq cr0, .LBB11_8 -; CHECK-BE-NEXT: .LBB11_7: +; CHECK-BE-NEXT: beq cr0, .LBB11_5 +; CHECK-BE-NEXT: .LBB11_4: ; CHECK-BE-NEXT: stdu r3, -4096(r1) ; CHECK-BE-NEXT: cmpd r1, r4 -; CHECK-BE-NEXT: bne cr0, .LBB11_7 -; CHECK-BE-NEXT: .LBB11_8: +; CHECK-BE-NEXT: bne cr0, .LBB11_4 +; CHECK-BE-NEXT: .LBB11_5: ; CHECK-BE-NEXT: addi r3, r1, -32768 ; CHECK-BE-NEXT: lbz r3, 0(r3) ; CHECK-BE-NEXT: mr r1, r30 @@ -961,17 +922,23 @@ ; ; CHECK-32-LABEL: f11: ; CHECK-32: # %bb.0: -; CHECK-32-NEXT: mr r12, r1 -; CHECK-32-NEXT: .cfi_def_cfa r12, 0 -; CHECK-32-NEXT: clrlwi r0, r12, 17 -; CHECK-32-NEXT: subc r1, r1, r0 -; CHECK-32-NEXT: li r0, 24 -; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: clrlwi r0, r1, 17 +; CHECK-32-NEXT: sub r12, r1, r0 +; CHECK-32-NEXT: lis r0, -2 +; CHECK-32-NEXT: ori r0, r0, 32768 +; CHECK-32-NEXT: add r12, r0, r12 ; CHECK-32-NEXT: .LBB11_1: -; CHECK-32-NEXT: stwu r12, -4096(r1) -; CHECK-32-NEXT: bdnz .LBB11_1 +; CHECK-32-NEXT: sub r0, r12, r1 +; CHECK-32-NEXT: cmpwi r0, -4096 +; CHECK-32-NEXT: bge cr0, .LBB11_3 ; CHECK-32-NEXT: # %bb.2: -; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: lwz r0, 0(r1) +; CHECK-32-NEXT: stwu r0, -4096(r1) +; CHECK-32-NEXT: b .LBB11_1 +; CHECK-32-NEXT: .LBB11_3: +; CHECK-32-NEXT: lwz r12, 0(r1) +; CHECK-32-NEXT: stwux r12, r1, r0 +; CHECK-32-NEXT: .cfi_def_cfa_register r12 ; CHECK-32-NEXT: sub r0, r1, r12 ; CHECK-32-NEXT: sub r0, r1, r0 ; CHECK-32-NEXT: addic r0, r0, -4 @@ -1003,12 +970,12 @@ ; CHECK-32-NEXT: add r4, r1, r7 ; CHECK-32-NEXT: stwux r3, r1, r5 ; CHECK-32-NEXT: cmpw r1, r4 -; CHECK-32-NEXT: beq cr0, .LBB11_4 -; CHECK-32-NEXT: .LBB11_3: +; CHECK-32-NEXT: beq cr0, .LBB11_5 +; CHECK-32-NEXT: .LBB11_4: ; CHECK-32-NEXT: stwu r3, -4096(r1) ; CHECK-32-NEXT: cmpw r1, r4 -; CHECK-32-NEXT: bne cr0, .LBB11_3 -; CHECK-32-NEXT: .LBB11_4: +; CHECK-32-NEXT: bne cr0, .LBB11_4 +; CHECK-32-NEXT: .LBB11_5: ; CHECK-32-NEXT: addi r3, r1, -32768 ; CHECK-32-NEXT: lbz r3, 0(r3) ; CHECK-32-NEXT: lwz r31, 0(r1)