diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -1187,7 +1187,6 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologMBB) const { - // TODO: Generate CFI instructions. bool isPPC64 = Subtarget.isPPC64(); const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); @@ -1283,7 +1282,7 @@ // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized // as negprobesize. At both stages, TempReg stores the value of // (stackptr - (stackptr % align)). - auto dynamicProbe = [&](MachineBasicBlock &MBB, + auto ProbeRealignGap = [&](MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register ScratchReg, Register TempReg) { assert(HasBP && isPPC64 && "Probe alignment part not available"); @@ -1293,8 +1292,20 @@ .addReg(BPReg) .addImm(0) .addImm(64 - Log2(MaxAlign)); + // When MaxAlign <= Subtarget.getRedZoneSize(), we already have stored BP + // in the redzone of the stack which is considered to be a free probe, so + // we don't need to perform intentional probe at such situation. + const unsigned RedZoneSize = Subtarget.getRedZoneSize(); + assert(ProbeSize >= RedZoneSize && + "ProbeSize should be larger than size of redzone"); + if (MaxAlign <= RedZoneSize) { + BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBF8), SPReg) + .addReg(ScratchReg) + .addReg(BPReg); + return &MBB; + } // TempReg = stackptr - (stackptr % align) - BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg) + BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBF8), TempReg) .addReg(ScratchReg) .addReg(BPReg); // ScratchReg = (stackptr % align) % probesize @@ -1303,26 +1314,32 @@ .addImm(0) .addImm(64 - Log2(ProbeSize)); Register CRReg = PPC::CR0; - // If (stackptr % align) % probesize == 0, we should not generate probe - // code. Layout of output assembly kinda like: + // If (stackptr % align) % probesize < redzonesize, we don't need to + // generate probe code. + // Layout of output assembly kinda like: // bb.0: // ... - // cmpldi $scratchreg, 0 - // beq bb.2 - // bb.1: # Probe tail interval + // cmpldi $scratchreg, + // bge bb.2 + // bb.1: # Skip tail interval < redzoneszie. + // sub r1, r1, $scratchreg + // b bb.3 + // bb.2: # Probe tail interval >= redzonesize. // neg $scratchreg, $scratchreg // stdux $bpreg, r1, $scratchreg - // bb.2: + // bb.3: // // cmpd r1, $tempreg - // beq bb.4 - // bb.3: # Loop to probe each block + // beq bb.5 + // bb.4: # Loop to probe each block // stdux $bpreg, r1, $scratchreg // cmpd r1, $tempreg - // bne bb.3 - // bb.4: + // bne bb.4 + // bb.5: // ... MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); + MachineBasicBlock *SkipResidualMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, SkipResidualMBB); MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB); MF.insert(MBBInsertPoint, ProbeResidualMBB); MachineBasicBlock *ProbeLoopPreHeaderMBB = @@ -1332,24 +1349,32 @@ MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); MF.insert(MBBInsertPoint, ProbeExitMBB); - // bb.4 + // bb.5 ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); // bb.0 - BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0); + BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg) + .addReg(ScratchReg) + .addImm(RedZoneSize); BuildMI(&MBB, DL, TII.get(PPC::BCC)) - .addImm(PPC::PRED_EQ) + .addImm(PPC::PRED_GE) .addReg(CRReg) - .addMBB(ProbeLoopPreHeaderMBB); + .addMBB(ProbeResidualMBB); MBB.addSuccessor(ProbeResidualMBB); - MBB.addSuccessor(ProbeLoopPreHeaderMBB); + MBB.addSuccessor(SkipResidualMBB); // bb.1 + BuildMI(SkipResidualMBB, DL, TII.get(PPC::SUBF8), SPReg) + .addReg(ScratchReg) + .addReg(SPReg); + BuildMI(SkipResidualMBB, DL, TII.get(PPC::B)).addMBB(ProbeLoopPreHeaderMBB); + SkipResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB); + // bb.2 BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg) .addReg(ScratchReg); allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg, false, BPReg); ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB); - // bb.2 + // bb.3 MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(), NegProbeSize, ScratchReg); BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg) @@ -1361,7 +1386,7 @@ .addMBB(ProbeExitMBB); ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB); ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB); - // bb.3 + // bb.4 allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg, false, BPReg); BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg) @@ -1374,6 +1399,7 @@ ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); // Update liveins. + recomputeLiveIns(*SkipResidualMBB); recomputeLiveIns(*ProbeResidualMBB); recomputeLiveIns(*ProbeLoopPreHeaderMBB); recomputeLiveIns(*ProbeLoopBodyMBB); @@ -1389,10 +1415,10 @@ // Use BPReg to calculate CFA. if (needsCFI) buildDefCFA(*CurrentMBB, {MI}, BPReg, 0); - // Since we have SPReg copied to BPReg at the moment, FPReg can be used as - // TempReg. + // Since we have SPReg copied to BPReg at the moment, FPReg can be used + // as TempReg. Register TempReg = FPReg; - CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg); + CurrentMBB = ProbeRealignGap(*CurrentMBB, {MI}, ScratchReg, TempReg); // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64. BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) .addReg(BPReg) diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll --- a/llvm/test/CodeGen/PowerPC/pr46759.ll +++ b/llvm/test/CodeGen/PowerPC/pr46759.ll @@ -11,23 +11,28 @@ ; CHECK-LE-NEXT: mr r30, r1 ; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 ; CHECK-LE-NEXT: clrldi r0, r30, 53 -; CHECK-LE-NEXT: subc r12, r30, r0 +; CHECK-LE-NEXT: sub r12, r30, r0 ; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB0_2 +; CHECK-LE-NEXT: cmpdi r0, 288 +; CHECK-LE-NEXT: blt cr0, .LBB0_2 ; CHECK-LE-NEXT: # %bb.1: # %entry ; CHECK-LE-NEXT: neg r0, r0 ; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: li r0, -4096 +; CHECK-LE-NEXT: b .LBB0_4 ; CHECK-LE-NEXT: .LBB0_2: # %entry +; CHECK-LE-NEXT: sub r1, r1, r0 ; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB0_4 +; CHECK-LE-NEXT: b .LBB0_4 +; CHECK-LE-NEXT: .p2align 4 ; CHECK-LE-NEXT: .LBB0_3: # %entry ; CHECK-LE-NEXT: # ; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: .LBB0_4: # %entry +; CHECK-LE-NEXT: # ; CHECK-LE-NEXT: cmpd r1, r12 ; CHECK-LE-NEXT: bne cr0, .LBB0_3 -; CHECK-LE-NEXT: .LBB0_4: # %entry +; CHECK-LE-NEXT: # %bb.5: # %entry ; CHECK-LE-NEXT: mr r12, r30 ; CHECK-LE-NEXT: stdu r12, -2048(r1) ; CHECK-LE-NEXT: stdu r12, -4096(r1) @@ -52,13 +57,13 @@ ; CHECK-LE-NEXT: add r4, r1, r4 ; CHECK-LE-NEXT: stdux r3, r1, r5 ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: beq cr0, .LBB0_6 -; CHECK-LE-NEXT: .LBB0_5: # %entry +; CHECK-LE-NEXT: beq cr0, .LBB0_7 +; CHECK-LE-NEXT: .LBB0_6: # %entry ; CHECK-LE-NEXT: # ; CHECK-LE-NEXT: stdu r3, -4096(r1) ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: bne cr0, .LBB0_5 -; CHECK-LE-NEXT: .LBB0_6: # %entry +; CHECK-LE-NEXT: bne cr0, .LBB0_6 +; CHECK-LE-NEXT: .LBB0_7: # %entry ; CHECK-LE-NEXT: addi r3, r1, 2048 ; CHECK-LE-NEXT: lbz r3, 0(r3) ; CHECK-LE-NEXT: mr r1, r30 diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll --- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll @@ -603,22 +603,26 @@ ; CHECK-LE-NEXT: mr r30, r1 ; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 ; CHECK-LE-NEXT: clrldi r0, r30, 53 -; CHECK-LE-NEXT: subc r12, r30, r0 +; CHECK-LE-NEXT: sub r12, r30, r0 ; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB9_2 +; CHECK-LE-NEXT: cmpdi r0, 288 +; CHECK-LE-NEXT: blt cr0, .LBB9_2 ; CHECK-LE-NEXT: # %bb.1: ; CHECK-LE-NEXT: neg r0, r0 ; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: li r0, -4096 +; CHECK-LE-NEXT: b .LBB9_4 ; CHECK-LE-NEXT: .LBB9_2: +; CHECK-LE-NEXT: sub r1, r1, r0 ; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB9_4 +; CHECK-LE-NEXT: b .LBB9_4 +; CHECK-LE-NEXT: .p2align 4 ; CHECK-LE-NEXT: .LBB9_3: ; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: .LBB9_4: ; CHECK-LE-NEXT: cmpd r1, r12 ; CHECK-LE-NEXT: bne cr0, .LBB9_3 -; CHECK-LE-NEXT: .LBB9_4: +; CHECK-LE-NEXT: # %bb.5: ; CHECK-LE-NEXT: mr r12, r30 ; CHECK-LE-NEXT: stdu r12, -2048(r1) ; CHECK-LE-NEXT: stdu r12, -4096(r1) @@ -641,22 +645,25 @@ ; CHECK-BE-NEXT: mr r30, r1 ; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 ; CHECK-BE-NEXT: clrldi r0, r30, 53 -; CHECK-BE-NEXT: subc r12, r30, r0 +; CHECK-BE-NEXT: sub r12, r30, r0 ; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB9_2 +; CHECK-BE-NEXT: cmpdi r0, 288 +; CHECK-BE-NEXT: blt cr0, .LBB9_2 ; CHECK-BE-NEXT: # %bb.1: ; CHECK-BE-NEXT: neg r0, r0 ; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: li r0, -4096 +; CHECK-BE-NEXT: b .LBB9_4 ; CHECK-BE-NEXT: .LBB9_2: +; CHECK-BE-NEXT: sub r1, r1, r0 ; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB9_4 +; CHECK-BE-NEXT: b .LBB9_4 ; CHECK-BE-NEXT: .LBB9_3: ; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: .LBB9_4: ; CHECK-BE-NEXT: cmpd r1, r12 ; CHECK-BE-NEXT: bne cr0, .LBB9_3 -; CHECK-BE-NEXT: .LBB9_4: +; CHECK-BE-NEXT: # %bb.5: ; CHECK-BE-NEXT: mr r12, r30 ; CHECK-BE-NEXT: stdu r12, -2048(r1) ; CHECK-BE-NEXT: stdu r12, -4096(r1) @@ -716,22 +723,26 @@ ; CHECK-LE-NEXT: mr r30, r1 ; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 ; CHECK-LE-NEXT: clrldi r0, r30, 54 -; CHECK-LE-NEXT: subc r12, r30, r0 +; CHECK-LE-NEXT: sub r12, r30, r0 ; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB10_2 +; CHECK-LE-NEXT: cmpdi r0, 288 +; CHECK-LE-NEXT: blt cr0, .LBB10_2 ; CHECK-LE-NEXT: # %bb.1: ; CHECK-LE-NEXT: neg r0, r0 ; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: li r0, -4096 +; CHECK-LE-NEXT: b .LBB10_4 ; CHECK-LE-NEXT: .LBB10_2: +; CHECK-LE-NEXT: sub r1, r1, r0 ; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB10_4 +; CHECK-LE-NEXT: b .LBB10_4 +; CHECK-LE-NEXT: .p2align 4 ; CHECK-LE-NEXT: .LBB10_3: ; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: .LBB10_4: ; CHECK-LE-NEXT: cmpd r1, r12 ; CHECK-LE-NEXT: bne cr0, .LBB10_3 -; CHECK-LE-NEXT: .LBB10_4: +; CHECK-LE-NEXT: # %bb.5: ; CHECK-LE-NEXT: mr r12, r30 ; CHECK-LE-NEXT: stdu r12, -1024(r1) ; CHECK-LE-NEXT: stdu r12, -4096(r1) @@ -753,22 +764,25 @@ ; CHECK-BE-NEXT: mr r30, r1 ; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 ; CHECK-BE-NEXT: clrldi r0, r30, 54 -; CHECK-BE-NEXT: subc r12, r30, r0 +; CHECK-BE-NEXT: sub r12, r30, r0 ; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB10_2 +; CHECK-BE-NEXT: cmpdi r0, 288 +; CHECK-BE-NEXT: blt cr0, .LBB10_2 ; CHECK-BE-NEXT: # %bb.1: ; CHECK-BE-NEXT: neg r0, r0 ; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: li r0, -4096 +; CHECK-BE-NEXT: b .LBB10_4 ; CHECK-BE-NEXT: .LBB10_2: +; CHECK-BE-NEXT: sub r1, r1, r0 ; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB10_4 +; CHECK-BE-NEXT: b .LBB10_4 ; CHECK-BE-NEXT: .LBB10_3: ; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: .LBB10_4: ; CHECK-BE-NEXT: cmpd r1, r12 ; CHECK-BE-NEXT: bne cr0, .LBB10_3 -; CHECK-BE-NEXT: .LBB10_4: +; CHECK-BE-NEXT: # %bb.5: ; CHECK-BE-NEXT: mr r12, r30 ; CHECK-BE-NEXT: stdu r12, -1024(r1) ; CHECK-BE-NEXT: stdu r12, -4096(r1) @@ -826,29 +840,33 @@ ; CHECK-LE-NEXT: mr r30, r1 ; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 ; CHECK-LE-NEXT: clrldi r0, r30, 49 -; CHECK-LE-NEXT: subc r12, r30, r0 +; CHECK-LE-NEXT: sub r12, r30, r0 ; CHECK-LE-NEXT: clrldi r0, r0, 52 -; CHECK-LE-NEXT: cmpdi r0, 0 -; CHECK-LE-NEXT: beq cr0, .LBB11_2 +; CHECK-LE-NEXT: cmpdi r0, 288 +; CHECK-LE-NEXT: blt cr0, .LBB11_2 ; CHECK-LE-NEXT: # %bb.1: ; CHECK-LE-NEXT: neg r0, r0 ; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: li r0, -4096 +; CHECK-LE-NEXT: b .LBB11_4 ; CHECK-LE-NEXT: .LBB11_2: +; CHECK-LE-NEXT: sub r1, r1, r0 ; CHECK-LE-NEXT: li r0, -4096 -; CHECK-LE-NEXT: cmpd r1, r12 -; CHECK-LE-NEXT: beq cr0, .LBB11_4 +; CHECK-LE-NEXT: b .LBB11_4 +; CHECK-LE-NEXT: .p2align 4 ; CHECK-LE-NEXT: .LBB11_3: ; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: .LBB11_4: ; CHECK-LE-NEXT: cmpd r1, r12 ; CHECK-LE-NEXT: bne cr0, .LBB11_3 -; CHECK-LE-NEXT: .LBB11_4: +; CHECK-LE-NEXT: # %bb.5: ; CHECK-LE-NEXT: mr r12, r30 ; CHECK-LE-NEXT: li r0, 24 ; CHECK-LE-NEXT: mtctr r0 -; CHECK-LE-NEXT: .LBB11_5: +; CHECK-LE-NEXT: .LBB11_6: ; CHECK-LE-NEXT: stdu r12, -4096(r1) -; CHECK-LE-NEXT: bdnz .LBB11_5 -; CHECK-LE-NEXT: # %bb.6: +; CHECK-LE-NEXT: bdnz .LBB11_6 +; CHECK-LE-NEXT: # %bb.7: ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 ; CHECK-LE-NEXT: .cfi_def_cfa_register r30 ; CHECK-LE-NEXT: .cfi_offset r31, -8 @@ -876,12 +894,12 @@ ; CHECK-LE-NEXT: add r4, r1, r7 ; CHECK-LE-NEXT: stdux r3, r1, r5 ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: beq cr0, .LBB11_8 -; CHECK-LE-NEXT: .LBB11_7: +; CHECK-LE-NEXT: beq cr0, .LBB11_9 +; CHECK-LE-NEXT: .LBB11_8: ; CHECK-LE-NEXT: stdu r3, -4096(r1) ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: bne cr0, .LBB11_7 -; CHECK-LE-NEXT: .LBB11_8: +; CHECK-LE-NEXT: bne cr0, .LBB11_8 +; CHECK-LE-NEXT: .LBB11_9: ; CHECK-LE-NEXT: addi r3, r1, -32768 ; CHECK-LE-NEXT: lbz r3, 0(r3) ; CHECK-LE-NEXT: mr r1, r30 @@ -896,29 +914,32 @@ ; CHECK-BE-NEXT: mr r30, r1 ; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 ; CHECK-BE-NEXT: clrldi r0, r30, 49 -; CHECK-BE-NEXT: subc r12, r30, r0 +; CHECK-BE-NEXT: sub r12, r30, r0 ; CHECK-BE-NEXT: clrldi r0, r0, 52 -; CHECK-BE-NEXT: cmpdi r0, 0 -; CHECK-BE-NEXT: beq cr0, .LBB11_2 +; CHECK-BE-NEXT: cmpdi r0, 288 +; CHECK-BE-NEXT: blt cr0, .LBB11_2 ; CHECK-BE-NEXT: # %bb.1: ; CHECK-BE-NEXT: neg r0, r0 ; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: li r0, -4096 +; CHECK-BE-NEXT: b .LBB11_4 ; CHECK-BE-NEXT: .LBB11_2: +; CHECK-BE-NEXT: sub r1, r1, r0 ; CHECK-BE-NEXT: li r0, -4096 -; CHECK-BE-NEXT: cmpd r1, r12 -; CHECK-BE-NEXT: beq cr0, .LBB11_4 +; CHECK-BE-NEXT: b .LBB11_4 ; CHECK-BE-NEXT: .LBB11_3: ; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: .LBB11_4: ; CHECK-BE-NEXT: cmpd r1, r12 ; CHECK-BE-NEXT: bne cr0, .LBB11_3 -; CHECK-BE-NEXT: .LBB11_4: +; CHECK-BE-NEXT: # %bb.5: ; CHECK-BE-NEXT: mr r12, r30 ; CHECK-BE-NEXT: li r0, 24 ; CHECK-BE-NEXT: mtctr r0 -; CHECK-BE-NEXT: .LBB11_5: +; CHECK-BE-NEXT: .LBB11_6: ; CHECK-BE-NEXT: stdu r12, -4096(r1) -; CHECK-BE-NEXT: bdnz .LBB11_5 -; CHECK-BE-NEXT: # %bb.6: +; CHECK-BE-NEXT: bdnz .LBB11_6 +; CHECK-BE-NEXT: # %bb.7: ; CHECK-BE-NEXT: .cfi_def_cfa_register r1 ; CHECK-BE-NEXT: .cfi_def_cfa_register r30 ; CHECK-BE-NEXT: .cfi_offset r31, -8 @@ -946,12 +967,12 @@ ; CHECK-BE-NEXT: add r4, r1, r7 ; CHECK-BE-NEXT: stdux r3, r1, r5 ; CHECK-BE-NEXT: cmpd r1, r4 -; CHECK-BE-NEXT: beq cr0, .LBB11_8 -; CHECK-BE-NEXT: .LBB11_7: +; CHECK-BE-NEXT: beq cr0, .LBB11_9 +; CHECK-BE-NEXT: .LBB11_8: ; CHECK-BE-NEXT: stdu r3, -4096(r1) ; CHECK-BE-NEXT: cmpd r1, r4 -; CHECK-BE-NEXT: bne cr0, .LBB11_7 -; CHECK-BE-NEXT: .LBB11_8: +; CHECK-BE-NEXT: bne cr0, .LBB11_8 +; CHECK-BE-NEXT: .LBB11_9: ; CHECK-BE-NEXT: addi r3, r1, -32768 ; CHECK-BE-NEXT: lbz r3, 0(r3) ; CHECK-BE-NEXT: mr r1, r30 @@ -1026,4 +1047,84 @@ ret void } +; align < redzonesize +define i32 @f12(i64 %i) local_unnamed_addr #0 { +; CHECK-LE-LABEL: f12: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: std r30, -16(r1) +; CHECK-LE-NEXT: mr r30, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 +; CHECK-LE-NEXT: clrldi r0, r30, 58 +; CHECK-LE-NEXT: sub r1, r30, r0 +; CHECK-LE-NEXT: mr r12, r30 +; CHECK-LE-NEXT: stdu r12, -4032(r1) +; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_register r30 +; CHECK-LE-NEXT: .cfi_offset r30, -16 +; CHECK-LE-NEXT: addi r4, r1, 64 +; CHECK-LE-NEXT: sldi r3, r3, 2 +; CHECK-LE-NEXT: li r5, 1 +; CHECK-LE-NEXT: stwx r5, r4, r3 +; CHECK-LE-NEXT: lwz r3, 64(r1) +; CHECK-LE-NEXT: mr r1, r30 +; CHECK-LE-NEXT: ld r30, -16(r1) +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: f12: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: std r30, -16(r1) +; CHECK-BE-NEXT: mr r30, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 +; CHECK-BE-NEXT: clrldi r0, r30, 58 +; CHECK-BE-NEXT: sub r1, r30, r0 +; CHECK-BE-NEXT: mr r12, r30 +; CHECK-BE-NEXT: stdu r12, -4032(r1) +; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: .cfi_def_cfa_register r30 +; CHECK-BE-NEXT: .cfi_offset r30, -16 +; CHECK-BE-NEXT: addi r4, r1, 64 +; CHECK-BE-NEXT: li r5, 1 +; CHECK-BE-NEXT: sldi r3, r3, 2 +; CHECK-BE-NEXT: stwx r5, r4, r3 +; CHECK-BE-NEXT: lwz r3, 64(r1) +; CHECK-BE-NEXT: mr r1, r30 +; CHECK-BE-NEXT: ld r30, -16(r1) +; CHECK-BE-NEXT: blr +; +; CHECK-32-LABEL: f12: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: .cfi_def_cfa r12, 0 +; CHECK-32-NEXT: clrlwi r0, r12, 26 +; CHECK-32-NEXT: subc r1, r1, r0 +; CHECK-32-NEXT: stwu r12, -4032(r1) +; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: addic r0, r0, -8 +; CHECK-32-NEXT: stwx r30, 0, r0 +; CHECK-32-NEXT: addic r30, r0, 8 +; CHECK-32-NEXT: .cfi_def_cfa_register r30 +; CHECK-32-NEXT: .cfi_offset r30, -8 +; CHECK-32-NEXT: addi r3, r1, 64 +; CHECK-32-NEXT: li r5, 1 +; CHECK-32-NEXT: slwi r4, r4, 2 +; CHECK-32-NEXT: stwx r5, r3, r4 +; CHECK-32-NEXT: mr r0, r31 +; CHECK-32-NEXT: lwz r3, 64(r1) +; CHECK-32-NEXT: lwz r31, 0(r1) +; CHECK-32-NEXT: lwz r30, -8(r31) +; CHECK-32-NEXT: mr r1, r31 +; CHECK-32-NEXT: mr r31, r0 +; CHECK-32-NEXT: blr + %a = alloca i32, i32 2000, align 64 + %b = getelementptr inbounds i32, i32* %a, i64 %i + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + attributes #0 = { "probe-stack"="inline-asm" }