diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -525,6 +525,8 @@ // register is available, we can adjust for that by not overlapping the spill // code. However, if we need to realign the stack (i.e. have a base pointer) // and the stack frame is large, we need two scratch registers. +// Also, stack probe requires two scratch registers, one for old sp, one for +// large frame and large probe size. bool PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); @@ -536,8 +538,10 @@ MachineFrameInfo &MFI = MF.getFrameInfo(); Align MaxAlign = MFI.getMaxAlign(); bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); + const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); - return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; + return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || + TLI.hasInlineStackProbe(MF); } bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { @@ -676,12 +680,8 @@ "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); // Using the same bool variable as below to suppress compiler warnings. - // Stack probe requires two scratch registers, one for old sp, one for large - // frame and large probe size. bool SingleScratchReg = findScratchRegister( - &MBB, false, - twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF), - &ScratchReg, &TempReg); + &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg); assert(SingleScratchReg && "Required number of registers not available in this block"); @@ -1202,10 +1202,12 @@ if (StackAllocMIPos == PrologMBB.end()) return; const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); + MachineBasicBlock *CurrentMBB = &PrologMBB; DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); MachineInstr &MI = *StackAllocMIPos; int64_t NegFrameSize = MI.getOperand(2).getImm(); - int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF); + unsigned ProbeSize = TLI.getStackProbeSize(MF); + int64_t NegProbeSize = -(int64_t)ProbeSize; assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); int64_t NumBlocks = NegFrameSize / NegProbeSize; int64_t NegResidualSize = NegFrameSize % NegProbeSize; @@ -1214,10 +1216,9 @@ Register FPReg = MI.getOperand(1).getReg(); const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); bool HasBP = RegInfo->hasBasePointer(MF); + Register BPReg = RegInfo->getBaseRegister(MF); Align MaxAlign = MFI.getMaxAlign(); - // Initialize current frame pointer. const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); - BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); // Subroutines to generate .cfi_* directives. auto buildDefCFAReg = [&](MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register Reg) { @@ -1257,89 +1258,218 @@ // Subroutine to store frame pointer and decrease stack pointer by probe size. auto allocateAndProbe = [&](MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, int64_t NegSize, - Register NegSizeReg, bool UseDForm) { + Register NegSizeReg, bool UseDForm, + Register StoreReg) { if (UseDForm) BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) - .addReg(FPReg) + .addReg(StoreReg) .addImm(NegSize) .addReg(SPReg); else BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) - .addReg(FPReg) + .addReg(StoreReg) .addReg(SPReg) .addReg(NegSizeReg); }; - // Use FPReg to calculate CFA. - if (needsCFI) - buildDefCFA(PrologMBB, {MI}, FPReg, 0); - // For case HasBP && MaxAlign > 1, we have to align the SP by performing + // Used to probe realignment gap [stackptr - (stackptr % align), stackptr) + // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30 + // available and r1 is already copied to r30 which is BPReg. So BPReg stores + // the value of stackptr. + // First we have to probe tail interval whose size is less than probesize, + // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage, + // ScratchReg stores the value of ((stackptr % align) % probesize). Then we + // probe each block sized probesize until stackptr meets + // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized + // as negprobesize. At both stages, TempReg stores the value of + // (stackptr - (stackptr % align)). + auto dynamicProbe = [&](MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, Register ScratchReg, + Register TempReg) { + assert(HasBP && isPPC64 && "Probe alignment part not available"); + assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); + // ScratchReg = stackptr % align + BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) + .addReg(BPReg) + .addImm(0) + .addImm(64 - Log2(MaxAlign)); + // TempReg = stackptr - (stackptr % align) + BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg) + .addReg(ScratchReg) + .addReg(BPReg); + // ScratchReg = (stackptr % align) % probesize + BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg) + .addReg(ScratchReg) + .addImm(0) + .addImm(64 - Log2(ProbeSize)); + Register CRReg = PPC::CR0; + // If (stackptr % align) % probesize == 0, we should not generate probe + // code. Layout of output assembly kinda like: + // bb.0: + // ... + // cmpldi $scratchreg, 0 + // beq bb.2 + // bb.1: # Probe tail interval + // neg $scratchreg, $scratchreg + // stdux $bpreg, r1, $scratchreg + // bb.2: + // + // cmpd r1, $tempreg + // beq bb.4 + // bb.3: # Loop to probe each block + // stdux $bpreg, r1, $scratchreg + // cmpd r1, $tempreg + // bne bb.3 + // bb.4: + // ... + MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); + MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ProbeResidualMBB); + MachineBasicBlock *ProbeLoopPreHeaderMBB = + MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB); + MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); + MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); + MF.insert(MBBInsertPoint, ProbeExitMBB); + // bb.4 + ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); + ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); + // bb.0 + BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0); + BuildMI(&MBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_EQ) + .addReg(CRReg) + .addMBB(ProbeLoopPreHeaderMBB); + MBB.addSuccessor(ProbeResidualMBB); + MBB.addSuccessor(ProbeLoopPreHeaderMBB); + // bb.1 + BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg) + .addReg(ScratchReg); + allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg, + false, BPReg); + ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB); + // bb.2 + MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(), + NegProbeSize, ScratchReg); + BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg) + .addReg(SPReg) + .addReg(TempReg); + BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_EQ) + .addReg(CRReg) + .addMBB(ProbeExitMBB); + ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB); + ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB); + // bb.3 + allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg, + false, BPReg); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg) + .addReg(SPReg) + .addReg(TempReg); + BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) + .addImm(PPC::PRED_NE) + .addReg(CRReg) + .addMBB(ProbeLoopBodyMBB); + ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); + ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); + // Update liveins. + recomputeLiveIns(*ProbeResidualMBB); + recomputeLiveIns(*ProbeLoopPreHeaderMBB); + recomputeLiveIns(*ProbeLoopBodyMBB); + recomputeLiveIns(*ProbeExitMBB); + return ProbeExitMBB; + }; + // For case HasBP && MaxAlign > 1, we have to realign the SP by performing // SP = SP - SP % MaxAlign. if (HasBP && MaxAlign > 1) { - if (isPPC64) - BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) - .addReg(FPReg) - .addImm(0) - .addImm(64 - Log2(MaxAlign)); - else - BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) + // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in + // 64-bit mode. + if (isPPC64) { + // Use BPReg to calculate CFA. + if (needsCFI) + buildDefCFA(*CurrentMBB, {MI}, BPReg, 0); + // Since we have SPReg copied to BPReg at the moment, FPReg can be used as + // TempReg. + Register TempReg = FPReg; + CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg); + // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64. + BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) + .addReg(BPReg) + .addReg(BPReg); + } else { + // Initialize current frame pointer. + BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg) + .addReg(SPReg) + .addReg(SPReg); + // Use FPReg to calculate CFA. + if (needsCFI) + buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) .addReg(FPReg) .addImm(0) .addImm(32 - Log2(MaxAlign)) .addImm(31); - BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC), - SPReg) - .addReg(ScratchReg) - .addReg(SPReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg) + .addReg(ScratchReg) + .addReg(SPReg); + } + } else { + // Initialize current frame pointer. + BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); + // Use FPReg to calculate CFA. + if (needsCFI) + buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); } // Probe residual part. if (NegResidualSize) { bool ResidualUseDForm = CanUseDForm(NegResidualSize); if (!ResidualUseDForm) - MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg); - allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg, - ResidualUseDForm); + MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); + allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, + ResidualUseDForm, FPReg); } bool UseDForm = CanUseDForm(NegProbeSize); // If number of blocks is small, just probe them directly. if (NumBlocks < 3) { if (!UseDForm) - MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); for (int i = 0; i < NumBlocks; ++i) - allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm); + allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, + FPReg); if (needsCFI) { // Restore using SPReg to calculate CFA. - buildDefCFAReg(PrologMBB, {MI}, SPReg); + buildDefCFAReg(*CurrentMBB, {MI}, SPReg); } } else { // Since CTR is a volatile register and current shrinkwrap implementation // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a // CTR loop to probe. // Calculate trip count and stores it in CTRReg. - MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg); - BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) + MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); + BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) .addReg(ScratchReg, RegState::Kill); if (!UseDForm) - MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); + MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); // Create MBBs of the loop. MachineFunction::iterator MBBInsertPoint = - std::next(PrologMBB.getIterator()); + std::next(CurrentMBB->getIterator()); MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); MF.insert(MBBInsertPoint, LoopMBB); MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); MF.insert(MBBInsertPoint, ExitMBB); // Synthesize the loop body. allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, - UseDForm); + UseDForm, FPReg); BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) .addMBB(LoopMBB); LoopMBB->addSuccessor(ExitMBB); LoopMBB->addSuccessor(LoopMBB); // Synthesize the exit MBB. - ExitMBB->splice(ExitMBB->end(), &PrologMBB, + ExitMBB->splice(ExitMBB->end(), CurrentMBB, std::next(MachineBasicBlock::iterator(MI)), - PrologMBB.end()); - ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB); - PrologMBB.addSuccessor(LoopMBB); + CurrentMBB->end()); + ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); + CurrentMBB->addSuccessor(LoopMBB); if (needsCFI) { // Restore using SPReg to calculate CFA. buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll --- a/llvm/test/CodeGen/PowerPC/pr46759.ll +++ b/llvm/test/CodeGen/PowerPC/pr46759.ll @@ -9,10 +9,26 @@ ; CHECK-LE-NEXT: std r31, -8(r1) ; CHECK-LE-NEXT: std r30, -16(r1) ; CHECK-LE-NEXT: mr r30, r1 -; CHECK-LE-NEXT: mr r12, r1 -; CHECK-LE-NEXT: .cfi_def_cfa r12, 0 -; CHECK-LE-NEXT: clrldi r0, r12, 53 -; CHECK-LE-NEXT: subc r1, r1, r0 +; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 +; CHECK-LE-NEXT: clrldi r0, r30, 53 +; CHECK-LE-NEXT: subc r12, r30, r0 +; CHECK-LE-NEXT: clrldi r0, r0, 52 +; CHECK-LE-NEXT: cmpdi r0, 0 +; CHECK-LE-NEXT: beq cr0, .LBB0_2 +; CHECK-LE-NEXT: # %bb.1: # %entry +; CHECK-LE-NEXT: neg r0, r0 +; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: .LBB0_2: # %entry +; CHECK-LE-NEXT: li r0, -4096 +; CHECK-LE-NEXT: cmpd r1, r12 +; CHECK-LE-NEXT: beq cr0, .LBB0_4 +; CHECK-LE-NEXT: .LBB0_3: # %entry +; CHECK-LE-NEXT: # +; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: cmpd r1, r12 +; CHECK-LE-NEXT: bne cr0, .LBB0_3 +; CHECK-LE-NEXT: .LBB0_4: # %entry +; CHECK-LE-NEXT: mr r12, r30 ; CHECK-LE-NEXT: stdu r12, -2048(r1) ; CHECK-LE-NEXT: stdu r12, -4096(r1) ; CHECK-LE-NEXT: .cfi_def_cfa_register r1 @@ -36,13 +52,13 @@ ; CHECK-LE-NEXT: add r4, r1, r4 ; CHECK-LE-NEXT: stdux r3, r1, r5 ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: beq cr0, .LBB0_2 -; CHECK-LE-NEXT: .LBB0_1: # %entry +; CHECK-LE-NEXT: beq cr0, .LBB0_6 +; CHECK-LE-NEXT: .LBB0_5: # %entry ; CHECK-LE-NEXT: # ; CHECK-LE-NEXT: stdu r3, -4096(r1) ; CHECK-LE-NEXT: cmpd r1, r4 -; CHECK-LE-NEXT: bne cr0, .LBB0_1 -; CHECK-LE-NEXT: .LBB0_2: # %entry +; CHECK-LE-NEXT: bne cr0, .LBB0_5 +; CHECK-LE-NEXT: .LBB0_6: # %entry ; CHECK-LE-NEXT: addi r3, r1, 2048 ; CHECK-LE-NEXT: lbz r3, 0(r3) ; CHECK-LE-NEXT: ld r1, 0(r1) diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll --- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll @@ -528,4 +528,502 @@ ret i8 %c } +; alloca + align < probe_size +define i32 @f8(i64 %i) local_unnamed_addr #0 { +; CHECK-LE-LABEL: f8: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: clrldi r0, r1, 58 +; CHECK-LE-NEXT: std r30, -16(r1) +; CHECK-LE-NEXT: mr r30, r1 +; CHECK-LE-NEXT: subfic r0, r0, -896 +; CHECK-LE-NEXT: stdux r1, r1, r0 +; CHECK-LE-NEXT: .cfi_def_cfa_register r30 +; CHECK-LE-NEXT: .cfi_offset r30, -16 +; CHECK-LE-NEXT: addi r4, r1, 64 +; CHECK-LE-NEXT: sldi r3, r3, 2 +; CHECK-LE-NEXT: li r5, 1 +; CHECK-LE-NEXT: stwx r5, r4, r3 +; CHECK-LE-NEXT: lwz r3, 64(r1) +; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: ld r30, -16(r1) +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: f8: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: clrldi r0, r1, 58 +; CHECK-BE-NEXT: std r30, -16(r1) +; CHECK-BE-NEXT: mr r30, r1 +; CHECK-BE-NEXT: subfic r0, r0, -896 +; CHECK-BE-NEXT: stdux r1, r1, r0 +; CHECK-BE-NEXT: .cfi_def_cfa_register r30 +; CHECK-BE-NEXT: .cfi_offset r30, -16 +; CHECK-BE-NEXT: addi r4, r1, 64 +; CHECK-BE-NEXT: li r5, 1 +; CHECK-BE-NEXT: sldi r3, r3, 2 +; CHECK-BE-NEXT: stwx r5, r4, r3 +; CHECK-BE-NEXT: lwz r3, 64(r1) +; CHECK-BE-NEXT: ld r1, 0(r1) +; CHECK-BE-NEXT: ld r30, -16(r1) +; CHECK-BE-NEXT: blr +; +; CHECK-32-LABEL: f8: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: clrlwi r0, r1, 26 +; CHECK-32-NEXT: subfic r0, r0, -896 +; CHECK-32-NEXT: stwux r1, r1, r0 +; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: addic r0, r0, -8 +; CHECK-32-NEXT: stwx r30, 0, r0 +; CHECK-32-NEXT: addic r30, r0, 8 +; CHECK-32-NEXT: .cfi_def_cfa_register r30 +; CHECK-32-NEXT: .cfi_offset r30, -8 +; CHECK-32-NEXT: addi r3, r1, 64 +; CHECK-32-NEXT: li r5, 1 +; CHECK-32-NEXT: slwi r4, r4, 2 +; CHECK-32-NEXT: stwx r5, r3, r4 +; CHECK-32-NEXT: mr r0, r31 +; CHECK-32-NEXT: lwz r3, 64(r1) +; CHECK-32-NEXT: lwz r31, 0(r1) +; CHECK-32-NEXT: lwz r30, -8(r31) +; CHECK-32-NEXT: mr r1, r31 +; CHECK-32-NEXT: mr r31, r0 +; CHECK-32-NEXT: blr + %a = alloca i32, i32 200, align 64 + %b = getelementptr inbounds i32, i32* %a, i64 %i + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +; alloca > probe_size, align > probe_size +define i32 @f9(i64 %i) local_unnamed_addr #0 { +; CHECK-LE-LABEL: f9: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: std r30, -16(r1) +; CHECK-LE-NEXT: mr r30, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 +; CHECK-LE-NEXT: clrldi r0, r30, 53 +; CHECK-LE-NEXT: subc r12, r30, r0 +; CHECK-LE-NEXT: clrldi r0, r0, 52 +; CHECK-LE-NEXT: cmpdi r0, 0 +; CHECK-LE-NEXT: beq cr0, .LBB9_2 +; CHECK-LE-NEXT: # %bb.1: +; CHECK-LE-NEXT: neg r0, r0 +; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: .LBB9_2: +; CHECK-LE-NEXT: li r0, -4096 +; CHECK-LE-NEXT: cmpd r1, r12 +; CHECK-LE-NEXT: beq cr0, .LBB9_4 +; CHECK-LE-NEXT: .LBB9_3: +; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: cmpd r1, r12 +; CHECK-LE-NEXT: bne cr0, .LBB9_3 +; CHECK-LE-NEXT: .LBB9_4: +; CHECK-LE-NEXT: mr r12, r30 +; CHECK-LE-NEXT: stdu r12, -2048(r1) +; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_register r30 +; CHECK-LE-NEXT: .cfi_offset r30, -16 +; CHECK-LE-NEXT: addi r4, r1, 2048 +; CHECK-LE-NEXT: sldi r3, r3, 2 +; CHECK-LE-NEXT: li r5, 1 +; CHECK-LE-NEXT: stwx r5, r4, r3 +; CHECK-LE-NEXT: lwz r3, 2048(r1) +; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: ld r30, -16(r1) +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: f9: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: std r30, -16(r1) +; CHECK-BE-NEXT: mr r30, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 +; CHECK-BE-NEXT: clrldi r0, r30, 53 +; CHECK-BE-NEXT: subc r12, r30, r0 +; CHECK-BE-NEXT: clrldi r0, r0, 52 +; CHECK-BE-NEXT: cmpdi r0, 0 +; CHECK-BE-NEXT: beq cr0, .LBB9_2 +; CHECK-BE-NEXT: # %bb.1: +; CHECK-BE-NEXT: neg r0, r0 +; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: .LBB9_2: +; CHECK-BE-NEXT: li r0, -4096 +; CHECK-BE-NEXT: cmpd r1, r12 +; CHECK-BE-NEXT: beq cr0, .LBB9_4 +; CHECK-BE-NEXT: .LBB9_3: +; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: cmpd r1, r12 +; CHECK-BE-NEXT: bne cr0, .LBB9_3 +; CHECK-BE-NEXT: .LBB9_4: +; CHECK-BE-NEXT: mr r12, r30 +; CHECK-BE-NEXT: stdu r12, -2048(r1) +; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: .cfi_def_cfa_register r30 +; CHECK-BE-NEXT: .cfi_offset r30, -16 +; CHECK-BE-NEXT: addi r4, r1, 2048 +; CHECK-BE-NEXT: li r5, 1 +; CHECK-BE-NEXT: sldi r3, r3, 2 +; CHECK-BE-NEXT: stwx r5, r4, r3 +; CHECK-BE-NEXT: lwz r3, 2048(r1) +; CHECK-BE-NEXT: ld r1, 0(r1) +; CHECK-BE-NEXT: ld r30, -16(r1) +; CHECK-BE-NEXT: blr +; +; CHECK-32-LABEL: f9: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: .cfi_def_cfa r12, 0 +; CHECK-32-NEXT: clrlwi r0, r12, 21 +; CHECK-32-NEXT: subc r1, r1, r0 +; CHECK-32-NEXT: stwu r12, -2048(r1) +; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: addic r0, r0, -8 +; CHECK-32-NEXT: stwx r30, 0, r0 +; CHECK-32-NEXT: addic r30, r0, 8 +; CHECK-32-NEXT: .cfi_def_cfa_register r30 +; CHECK-32-NEXT: .cfi_offset r30, -8 +; CHECK-32-NEXT: addi r3, r1, 2048 +; CHECK-32-NEXT: li r5, 1 +; CHECK-32-NEXT: slwi r4, r4, 2 +; CHECK-32-NEXT: stwx r5, r3, r4 +; CHECK-32-NEXT: mr r0, r31 +; CHECK-32-NEXT: lwz r3, 2048(r1) +; CHECK-32-NEXT: lwz r31, 0(r1) +; CHECK-32-NEXT: lwz r30, -8(r31) +; CHECK-32-NEXT: mr r1, r31 +; CHECK-32-NEXT: mr r31, r0 +; CHECK-32-NEXT: blr + %a = alloca i32, i32 2000, align 2048 + %b = getelementptr inbounds i32, i32* %a, i64 %i + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +; alloca < probe_size, align < probe_size, alloca + align > probe_size +define i32 @f10(i64 %i) local_unnamed_addr #0 { +; CHECK-LE-LABEL: f10: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: std r30, -16(r1) +; CHECK-LE-NEXT: mr r30, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 +; CHECK-LE-NEXT: clrldi r0, r30, 54 +; CHECK-LE-NEXT: subc r12, r30, r0 +; CHECK-LE-NEXT: clrldi r0, r0, 52 +; CHECK-LE-NEXT: cmpdi r0, 0 +; CHECK-LE-NEXT: beq cr0, .LBB10_2 +; CHECK-LE-NEXT: # %bb.1: +; CHECK-LE-NEXT: neg r0, r0 +; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: .LBB10_2: +; CHECK-LE-NEXT: li r0, -4096 +; CHECK-LE-NEXT: cmpd r1, r12 +; CHECK-LE-NEXT: beq cr0, .LBB10_4 +; CHECK-LE-NEXT: .LBB10_3: +; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: cmpd r1, r12 +; CHECK-LE-NEXT: bne cr0, .LBB10_3 +; CHECK-LE-NEXT: .LBB10_4: +; CHECK-LE-NEXT: mr r12, r30 +; CHECK-LE-NEXT: stdu r12, -1024(r1) +; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_register r30 +; CHECK-LE-NEXT: .cfi_offset r30, -16 +; CHECK-LE-NEXT: addi r4, r1, 1024 +; CHECK-LE-NEXT: sldi r3, r3, 2 +; CHECK-LE-NEXT: li r5, 1 +; CHECK-LE-NEXT: stwx r5, r4, r3 +; CHECK-LE-NEXT: lwz r3, 1024(r1) +; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: ld r30, -16(r1) +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: f10: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: std r30, -16(r1) +; CHECK-BE-NEXT: mr r30, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 +; CHECK-BE-NEXT: clrldi r0, r30, 54 +; CHECK-BE-NEXT: subc r12, r30, r0 +; CHECK-BE-NEXT: clrldi r0, r0, 52 +; CHECK-BE-NEXT: cmpdi r0, 0 +; CHECK-BE-NEXT: beq cr0, .LBB10_2 +; CHECK-BE-NEXT: # %bb.1: +; CHECK-BE-NEXT: neg r0, r0 +; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: .LBB10_2: +; CHECK-BE-NEXT: li r0, -4096 +; CHECK-BE-NEXT: cmpd r1, r12 +; CHECK-BE-NEXT: beq cr0, .LBB10_4 +; CHECK-BE-NEXT: .LBB10_3: +; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: cmpd r1, r12 +; CHECK-BE-NEXT: bne cr0, .LBB10_3 +; CHECK-BE-NEXT: .LBB10_4: +; CHECK-BE-NEXT: mr r12, r30 +; CHECK-BE-NEXT: stdu r12, -1024(r1) +; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: .cfi_def_cfa_register r30 +; CHECK-BE-NEXT: .cfi_offset r30, -16 +; CHECK-BE-NEXT: addi r4, r1, 1024 +; CHECK-BE-NEXT: li r5, 1 +; CHECK-BE-NEXT: sldi r3, r3, 2 +; CHECK-BE-NEXT: stwx r5, r4, r3 +; CHECK-BE-NEXT: lwz r3, 1024(r1) +; CHECK-BE-NEXT: ld r1, 0(r1) +; CHECK-BE-NEXT: ld r30, -16(r1) +; CHECK-BE-NEXT: blr +; +; CHECK-32-LABEL: f10: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: .cfi_def_cfa r12, 0 +; CHECK-32-NEXT: clrlwi r0, r12, 22 +; CHECK-32-NEXT: subc r1, r1, r0 +; CHECK-32-NEXT: stwu r12, -1024(r1) +; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: addic r0, r0, -8 +; CHECK-32-NEXT: stwx r30, 0, r0 +; CHECK-32-NEXT: addic r30, r0, 8 +; CHECK-32-NEXT: .cfi_def_cfa_register r30 +; CHECK-32-NEXT: .cfi_offset r30, -8 +; CHECK-32-NEXT: addi r3, r1, 1024 +; CHECK-32-NEXT: li r5, 1 +; CHECK-32-NEXT: slwi r4, r4, 2 +; CHECK-32-NEXT: stwx r5, r3, r4 +; CHECK-32-NEXT: mr r0, r31 +; CHECK-32-NEXT: lwz r3, 1024(r1) +; CHECK-32-NEXT: lwz r31, 0(r1) +; CHECK-32-NEXT: lwz r30, -8(r31) +; CHECK-32-NEXT: mr r1, r31 +; CHECK-32-NEXT: mr r31, r0 +; CHECK-32-NEXT: blr + %a = alloca i32, i32 1000, align 1024 + %b = getelementptr inbounds i32, i32* %a, i64 %i + store volatile i32 1, i32* %b + %c = load volatile i32, i32* %a + ret i32 %c +} + +define void @f11(i32 %vla_size, i64 %i) #0 { +; CHECK-LE-LABEL: f11: +; CHECK-LE: # %bb.0: +; CHECK-LE-NEXT: std r31, -8(r1) +; CHECK-LE-NEXT: std r30, -16(r1) +; CHECK-LE-NEXT: mr r30, r1 +; CHECK-LE-NEXT: .cfi_def_cfa r30, 0 +; CHECK-LE-NEXT: clrldi r0, r30, 49 +; CHECK-LE-NEXT: subc r12, r30, r0 +; CHECK-LE-NEXT: clrldi r0, r0, 52 +; CHECK-LE-NEXT: cmpdi r0, 0 +; CHECK-LE-NEXT: beq cr0, .LBB11_2 +; CHECK-LE-NEXT: # %bb.1: +; CHECK-LE-NEXT: neg r0, r0 +; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: .LBB11_2: +; CHECK-LE-NEXT: li r0, -4096 +; CHECK-LE-NEXT: cmpd r1, r12 +; CHECK-LE-NEXT: beq cr0, .LBB11_4 +; CHECK-LE-NEXT: .LBB11_3: +; CHECK-LE-NEXT: stdux r30, r1, r0 +; CHECK-LE-NEXT: cmpd r1, r12 +; CHECK-LE-NEXT: bne cr0, .LBB11_3 +; CHECK-LE-NEXT: .LBB11_4: +; CHECK-LE-NEXT: mr r12, r30 +; CHECK-LE-NEXT: li r0, 24 +; CHECK-LE-NEXT: mtctr r0 +; CHECK-LE-NEXT: .LBB11_5: +; CHECK-LE-NEXT: stdu r12, -4096(r1) +; CHECK-LE-NEXT: bdnz .LBB11_5 +; CHECK-LE-NEXT: # %bb.6: +; CHECK-LE-NEXT: .cfi_def_cfa_register r1 +; CHECK-LE-NEXT: .cfi_def_cfa_register r30 +; CHECK-LE-NEXT: .cfi_offset r31, -8 +; CHECK-LE-NEXT: .cfi_offset r30, -16 +; CHECK-LE-NEXT: clrldi r3, r3, 32 +; CHECK-LE-NEXT: lis r5, 1 +; CHECK-LE-NEXT: mr r31, r1 +; CHECK-LE-NEXT: li r6, 1 +; CHECK-LE-NEXT: addi r3, r3, 15 +; CHECK-LE-NEXT: ori r5, r5, 0 +; CHECK-LE-NEXT: rldicl r3, r3, 60, 4 +; CHECK-LE-NEXT: sldi r4, r4, 2 +; CHECK-LE-NEXT: add r5, r31, r5 +; CHECK-LE-NEXT: rldicl r3, r3, 4, 31 +; CHECK-LE-NEXT: stwx r6, r5, r4 +; CHECK-LE-NEXT: li r4, -32768 +; CHECK-LE-NEXT: neg r7, r3 +; CHECK-LE-NEXT: ld r3, 0(r1) +; CHECK-LE-NEXT: and r4, r7, r4 +; CHECK-LE-NEXT: mr r7, r4 +; CHECK-LE-NEXT: li r4, -4096 +; CHECK-LE-NEXT: divd r5, r7, r4 +; CHECK-LE-NEXT: mulld r4, r5, r4 +; CHECK-LE-NEXT: sub r5, r7, r4 +; CHECK-LE-NEXT: add r4, r1, r7 +; CHECK-LE-NEXT: stdux r3, r1, r5 +; CHECK-LE-NEXT: cmpd r1, r4 +; CHECK-LE-NEXT: beq cr0, .LBB11_8 +; CHECK-LE-NEXT: .LBB11_7: +; CHECK-LE-NEXT: stdu r3, -4096(r1) +; CHECK-LE-NEXT: cmpd r1, r4 +; CHECK-LE-NEXT: bne cr0, .LBB11_7 +; CHECK-LE-NEXT: .LBB11_8: +; CHECK-LE-NEXT: addi r3, r1, -32768 +; CHECK-LE-NEXT: lbz r3, 0(r3) +; CHECK-LE-NEXT: ld r1, 0(r1) +; CHECK-LE-NEXT: ld r31, -8(r1) +; CHECK-LE-NEXT: ld r30, -16(r1) +; CHECK-LE-NEXT: blr +; +; CHECK-BE-LABEL: f11: +; CHECK-BE: # %bb.0: +; CHECK-BE-NEXT: std r31, -8(r1) +; CHECK-BE-NEXT: std r30, -16(r1) +; CHECK-BE-NEXT: mr r30, r1 +; CHECK-BE-NEXT: .cfi_def_cfa r30, 0 +; CHECK-BE-NEXT: clrldi r0, r30, 49 +; CHECK-BE-NEXT: subc r12, r30, r0 +; CHECK-BE-NEXT: clrldi r0, r0, 52 +; CHECK-BE-NEXT: cmpdi r0, 0 +; CHECK-BE-NEXT: beq cr0, .LBB11_2 +; CHECK-BE-NEXT: # %bb.1: +; CHECK-BE-NEXT: neg r0, r0 +; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: .LBB11_2: +; CHECK-BE-NEXT: li r0, -4096 +; CHECK-BE-NEXT: cmpd r1, r12 +; CHECK-BE-NEXT: beq cr0, .LBB11_4 +; CHECK-BE-NEXT: .LBB11_3: +; CHECK-BE-NEXT: stdux r30, r1, r0 +; CHECK-BE-NEXT: cmpd r1, r12 +; CHECK-BE-NEXT: bne cr0, .LBB11_3 +; CHECK-BE-NEXT: .LBB11_4: +; CHECK-BE-NEXT: mr r12, r30 +; CHECK-BE-NEXT: li r0, 24 +; CHECK-BE-NEXT: mtctr r0 +; CHECK-BE-NEXT: .LBB11_5: +; CHECK-BE-NEXT: stdu r12, -4096(r1) +; CHECK-BE-NEXT: bdnz .LBB11_5 +; CHECK-BE-NEXT: # %bb.6: +; CHECK-BE-NEXT: .cfi_def_cfa_register r1 +; CHECK-BE-NEXT: .cfi_def_cfa_register r30 +; CHECK-BE-NEXT: .cfi_offset r31, -8 +; CHECK-BE-NEXT: .cfi_offset r30, -16 +; CHECK-BE-NEXT: clrldi r3, r3, 32 +; CHECK-BE-NEXT: lis r5, 1 +; CHECK-BE-NEXT: addi r3, r3, 15 +; CHECK-BE-NEXT: mr r31, r1 +; CHECK-BE-NEXT: ori r5, r5, 0 +; CHECK-BE-NEXT: rldicl r3, r3, 60, 4 +; CHECK-BE-NEXT: add r5, r31, r5 +; CHECK-BE-NEXT: sldi r4, r4, 2 +; CHECK-BE-NEXT: li r6, 1 +; CHECK-BE-NEXT: rldicl r3, r3, 4, 31 +; CHECK-BE-NEXT: stwx r6, r5, r4 +; CHECK-BE-NEXT: neg r7, r3 +; CHECK-BE-NEXT: li r4, -32768 +; CHECK-BE-NEXT: and r4, r7, r4 +; CHECK-BE-NEXT: ld r3, 0(r1) +; CHECK-BE-NEXT: mr r7, r4 +; CHECK-BE-NEXT: li r4, -4096 +; CHECK-BE-NEXT: divd r5, r7, r4 +; CHECK-BE-NEXT: mulld r4, r5, r4 +; CHECK-BE-NEXT: sub r5, r7, r4 +; CHECK-BE-NEXT: add r4, r1, r7 +; CHECK-BE-NEXT: stdux r3, r1, r5 +; CHECK-BE-NEXT: cmpd r1, r4 +; CHECK-BE-NEXT: beq cr0, .LBB11_8 +; CHECK-BE-NEXT: .LBB11_7: +; CHECK-BE-NEXT: stdu r3, -4096(r1) +; CHECK-BE-NEXT: cmpd r1, r4 +; CHECK-BE-NEXT: bne cr0, .LBB11_7 +; CHECK-BE-NEXT: .LBB11_8: +; CHECK-BE-NEXT: addi r3, r1, -32768 +; CHECK-BE-NEXT: lbz r3, 0(r3) +; CHECK-BE-NEXT: ld r1, 0(r1) +; CHECK-BE-NEXT: ld r31, -8(r1) +; CHECK-BE-NEXT: ld r30, -16(r1) +; CHECK-BE-NEXT: blr +; +; CHECK-32-LABEL: f11: +; CHECK-32: # %bb.0: +; CHECK-32-NEXT: mr r12, r1 +; CHECK-32-NEXT: .cfi_def_cfa r12, 0 +; CHECK-32-NEXT: clrlwi r0, r12, 17 +; CHECK-32-NEXT: subc r1, r1, r0 +; CHECK-32-NEXT: li r0, 24 +; CHECK-32-NEXT: mtctr r0 +; CHECK-32-NEXT: .LBB11_1: +; CHECK-32-NEXT: stwu r12, -4096(r1) +; CHECK-32-NEXT: bdnz .LBB11_1 +; CHECK-32-NEXT: # %bb.2: +; CHECK-32-NEXT: .cfi_def_cfa_register r1 +; CHECK-32-NEXT: sub r0, r1, r12 +; CHECK-32-NEXT: sub r0, r1, r0 +; CHECK-32-NEXT: addic r0, r0, -4 +; CHECK-32-NEXT: stwx r31, 0, r0 +; CHECK-32-NEXT: addic r0, r0, -4 +; CHECK-32-NEXT: stwx r30, 0, r0 +; CHECK-32-NEXT: addic r30, r0, 8 +; CHECK-32-NEXT: .cfi_def_cfa_register r30 +; CHECK-32-NEXT: .cfi_offset r31, -4 +; CHECK-32-NEXT: .cfi_offset r30, -8 +; CHECK-32-NEXT: lis r4, 1 +; CHECK-32-NEXT: mr r31, r1 +; CHECK-32-NEXT: ori r4, r4, 0 +; CHECK-32-NEXT: addi r3, r3, 15 +; CHECK-32-NEXT: add r4, r31, r4 +; CHECK-32-NEXT: li r5, 1 +; CHECK-32-NEXT: slwi r6, r6, 2 +; CHECK-32-NEXT: rlwinm r3, r3, 0, 0, 27 +; CHECK-32-NEXT: neg r7, r3 +; CHECK-32-NEXT: stwx r5, r4, r6 +; CHECK-32-NEXT: li r4, -32768 +; CHECK-32-NEXT: and r4, r7, r4 +; CHECK-32-NEXT: lwz r3, 0(r1) +; CHECK-32-NEXT: mr r7, r4 +; CHECK-32-NEXT: li r4, -4096 +; CHECK-32-NEXT: divw r5, r7, r4 +; CHECK-32-NEXT: mullw r4, r5, r4 +; CHECK-32-NEXT: sub r5, r7, r4 +; CHECK-32-NEXT: add r4, r1, r7 +; CHECK-32-NEXT: stwux r3, r1, r5 +; CHECK-32-NEXT: cmpw r1, r4 +; CHECK-32-NEXT: beq cr0, .LBB11_4 +; CHECK-32-NEXT: .LBB11_3: +; CHECK-32-NEXT: stwu r3, -4096(r1) +; CHECK-32-NEXT: cmpw r1, r4 +; CHECK-32-NEXT: bne cr0, .LBB11_3 +; CHECK-32-NEXT: .LBB11_4: +; CHECK-32-NEXT: addi r3, r1, -32768 +; CHECK-32-NEXT: lbz r3, 0(r3) +; CHECK-32-NEXT: lwz r31, 0(r1) +; CHECK-32-NEXT: lwz r0, -4(r31) +; CHECK-32-NEXT: lwz r30, -8(r31) +; CHECK-32-NEXT: mr r1, r31 +; CHECK-32-NEXT: mr r31, r0 +; CHECK-32-NEXT: blr + %a = alloca i32, i32 4096, align 32768 + %b = getelementptr inbounds i32, i32* %a, i64 %i + store volatile i32 1, i32* %b + %1 = zext i32 %vla_size to i64 + %vla = alloca i8, i64 %1, align 2048 + %2 = load volatile i8, i8* %vla, align 2048 + ret void +} + attributes #0 = { "probe-stack"="inline-asm" }