@@ -431,10 +431,257 @@ static bool usesTheStack(const MachineFunction &MF) {
431
431
return false ;
432
432
}
433
433
434
- void X86FrameLowering::emitStackProbeCall (MachineFunction &MF,
435
- MachineBasicBlock &MBB,
436
- MachineBasicBlock::iterator MBBI,
437
- DebugLoc DL) const {
434
+ MachineInstr *X86FrameLowering::emitStackProbe (MachineFunction &MF,
435
+ MachineBasicBlock &MBB,
436
+ MachineBasicBlock::iterator MBBI,
437
+ DebugLoc DL,
438
+ bool InProlog) const {
439
+ const X86Subtarget &STI = MF.getSubtarget <X86Subtarget>();
440
+ if (STI.isTargetWindowsCoreCLR ()) {
441
+ if (InProlog) {
442
+ return emitStackProbeInlineStub (MF, MBB, MBBI, DL, true );
443
+ } else {
444
+ return emitStackProbeInline (MF, MBB, MBBI, DL, false );
445
+ }
446
+ } else {
447
+ return emitStackProbeCall (MF, MBB, MBBI, DL, InProlog);
448
+ }
449
+ }
450
+
451
+ void X86FrameLowering::inlineStackProbe (MachineFunction &MF,
452
+ MachineBasicBlock &PrologMBB) const {
453
+ const StringRef ChkStkStubSymbol = " __chkstk_stub" ;
454
+ MachineInstr *ChkStkStub = nullptr ;
455
+
456
+ for (MachineInstr &MI : PrologMBB) {
457
+ if (MI.isCall () && MI.getOperand (0 ).isSymbol () &&
458
+ ChkStkStubSymbol == MI.getOperand (0 ).getSymbolName ()) {
459
+ ChkStkStub = &MI;
460
+ break ;
461
+ }
462
+ }
463
+
464
+ if (ChkStkStub != nullptr ) {
465
+ MachineBasicBlock::iterator MBBI = std::next (ChkStkStub->getIterator ());
466
+ assert (std::prev (MBBI).operator ==(ChkStkStub) &&
467
+ " MBBI expected after __chkstk_stub." );
468
+ DebugLoc DL = PrologMBB.findDebugLoc (MBBI);
469
+ emitStackProbeInline (MF, PrologMBB, MBBI, DL, true );
470
+ ChkStkStub->eraseFromParent ();
471
+ }
472
+ }
473
+
474
+ MachineInstr *X86FrameLowering::emitStackProbeInline (
475
+ MachineFunction &MF, MachineBasicBlock &MBB,
476
+ MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const {
477
+ const X86Subtarget &STI = MF.getSubtarget <X86Subtarget>();
478
+ assert (STI.is64Bit () && " different expansion needed for 32 bit" );
479
+ assert (STI.isTargetWindowsCoreCLR () && " custom expansion expects CoreCLR" );
480
+ const TargetInstrInfo &TII = *STI.getInstrInfo ();
481
+ const BasicBlock *LLVM_BB = MBB.getBasicBlock ();
482
+
483
+ // RAX contains the number of bytes of desired stack adjustment.
484
+ // The handling here assumes this value has already been updated so as to
485
+ // maintain stack alignment.
486
+ //
487
+ // We need to exit with RSP modified by this amount and execute suitable
488
+ // page touches to notify the OS that we're growing the stack responsibly.
489
+ // All stack probing must be done without modifying RSP.
490
+ //
491
+ // MBB:
492
+ // SizeReg = RAX;
493
+ // ZeroReg = 0
494
+ // CopyReg = RSP
495
+ // Flags, TestReg = CopyReg - SizeReg
496
+ // FinalReg = !Flags.Ovf ? TestReg : ZeroReg
497
+ // LimitReg = gs magic thread env access
498
+ // if FinalReg >= LimitReg goto ContinueMBB
499
+ // RoundBB:
500
+ // RoundReg = page address of FinalReg
501
+ // LoopMBB:
502
+ // LoopReg = PHI(LimitReg,ProbeReg)
503
+ // ProbeReg = LoopReg - PageSize
504
+ // [ProbeReg] = 0
505
+ // if (ProbeReg > RoundReg) goto LoopMBB
506
+ // ContinueMBB:
507
+ // RSP = RSP - RAX
508
+ // [rest of original MBB]
509
+
510
+ // Set up the new basic blocks
511
+ MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock (LLVM_BB);
512
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock (LLVM_BB);
513
+ MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock (LLVM_BB);
514
+
515
+ MachineFunction::iterator MBBIter = std::next (MBB.getIterator ());
516
+ MF.insert (MBBIter, RoundMBB);
517
+ MF.insert (MBBIter, LoopMBB);
518
+ MF.insert (MBBIter, ContinueMBB);
519
+
520
+ // Split MBB and move the tail portion down to ContinueMBB.
521
+ MachineBasicBlock::iterator BeforeMBBI = std::prev (MBBI);
522
+ ContinueMBB->splice (ContinueMBB->begin (), &MBB, MBBI, MBB.end ());
523
+ ContinueMBB->transferSuccessorsAndUpdatePHIs (&MBB);
524
+
525
+ // Some useful constants
526
+ const int64_t ThreadEnvironmentStackLimit = 0x10 ;
527
+ const int64_t PageSize = 0x1000 ;
528
+ const int64_t PageMask = ~(PageSize - 1 );
529
+
530
+ // Registers we need. For the normal case we use virtual
531
+ // registers. For the prolog expansion we use RAX, RCX and RDX.
532
+ MachineRegisterInfo &MRI = MF.getRegInfo ();
533
+ const TargetRegisterClass *RegClass = &X86::GR64RegClass;
534
+ const unsigned
535
+ SizeReg = InProlog ? X86::RAX : MRI.createVirtualRegister (RegClass),
536
+ ZeroReg = InProlog ? X86::RCX : MRI.createVirtualRegister (RegClass),
537
+ CopyReg = InProlog ? X86::RDX : MRI.createVirtualRegister (RegClass),
538
+ TestReg = InProlog ? X86::RDX : MRI.createVirtualRegister (RegClass),
539
+ FinalReg = InProlog ? X86::RDX : MRI.createVirtualRegister (RegClass),
540
+ RoundedReg = InProlog ? X86::RDX : MRI.createVirtualRegister (RegClass),
541
+ LimitReg = InProlog ? X86::RCX : MRI.createVirtualRegister (RegClass),
542
+ JoinReg = InProlog ? X86::RCX : MRI.createVirtualRegister (RegClass),
543
+ ProbeReg = InProlog ? X86::RCX : MRI.createVirtualRegister (RegClass);
544
+
545
+ // SP-relative offsets where we can save RCX and RDX.
546
+ int64_t RCXShadowSlot = 0 ;
547
+ int64_t RDXShadowSlot = 0 ;
548
+
549
+ // If inlining in the prolog, save RCX and RDX.
550
+ // Future optimization: don't save or restore if not live in.
551
+ if (InProlog) {
552
+ // Compute the offsets. We need to account for things already
553
+ // pushed onto the stack at this point: return address, frame
554
+ // pointer (if used), and callee saves.
555
+ X86MachineFunctionInfo *X86FI = MF.getInfo <X86MachineFunctionInfo>();
556
+ const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize ();
557
+ const bool HasFP = hasFP (MF);
558
+ RCXShadowSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0 );
559
+ RDXShadowSlot = RCXShadowSlot + 8 ;
560
+ // Emit the saves.
561
+ addRegOffset (BuildMI (&MBB, DL, TII.get (X86::MOV64mr)), X86::RSP, false ,
562
+ RCXShadowSlot)
563
+ .addReg (X86::RCX);
564
+ addRegOffset (BuildMI (&MBB, DL, TII.get (X86::MOV64mr)), X86::RSP, false ,
565
+ RDXShadowSlot)
566
+ .addReg (X86::RDX);
567
+ } else {
568
+ // Not in the prolog. Copy RAX to a virtual reg.
569
+ BuildMI (&MBB, DL, TII.get (X86::MOV64rr), SizeReg).addReg (X86::RAX);
570
+ }
571
+
572
+ // Add code to MBB to check for overflow and set the new target stack pointer
573
+ // to zero if so.
574
+ BuildMI (&MBB, DL, TII.get (X86::XOR64rr), ZeroReg)
575
+ .addReg (ZeroReg, RegState::Undef)
576
+ .addReg (ZeroReg, RegState::Undef);
577
+ BuildMI (&MBB, DL, TII.get (X86::MOV64rr), CopyReg).addReg (X86::RSP);
578
+ BuildMI (&MBB, DL, TII.get (X86::SUB64rr), TestReg)
579
+ .addReg (CopyReg)
580
+ .addReg (SizeReg);
581
+ BuildMI (&MBB, DL, TII.get (X86::CMOVB64rr), FinalReg)
582
+ .addReg (TestReg)
583
+ .addReg (ZeroReg);
584
+
585
+ // FinalReg now holds final stack pointer value, or zero if
586
+ // allocation would overflow. Compare against the current stack
587
+ // limit from the thread environment block. Note this limit is the
588
+ // lowest touched page on the stack, not the point at which the OS
589
+ // will cause an overflow exception, so this is just an optimization
590
+ // to avoid unnecessarily touching pages that are below the current
591
+ // SP but already commited to the stack by the OS.
592
+ BuildMI (&MBB, DL, TII.get (X86::MOV64rm), LimitReg)
593
+ .addReg (0 )
594
+ .addImm (1 )
595
+ .addReg (0 )
596
+ .addImm (ThreadEnvironmentStackLimit)
597
+ .addReg (X86::GS);
598
+ BuildMI (&MBB, DL, TII.get (X86::CMP64rr)).addReg (FinalReg).addReg (LimitReg);
599
+ // Jump if the desired stack pointer is at or above the stack limit.
600
+ BuildMI (&MBB, DL, TII.get (X86::JAE_1)).addMBB (ContinueMBB);
601
+
602
+ // Add code to roundMBB to round the final stack pointer to a page boundary.
603
+ BuildMI (RoundMBB, DL, TII.get (X86::AND64ri32), RoundedReg)
604
+ .addReg (FinalReg)
605
+ .addImm (PageMask);
606
+ BuildMI (RoundMBB, DL, TII.get (X86::JMP_1)).addMBB (LoopMBB);
607
+
608
+ // LimitReg now holds the current stack limit, RoundedReg page-rounded
609
+ // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page
610
+ // and probe until we reach RoundedReg.
611
+ if (!InProlog) {
612
+ BuildMI (LoopMBB, DL, TII.get (X86::PHI), JoinReg)
613
+ .addReg (LimitReg)
614
+ .addMBB (RoundMBB)
615
+ .addReg (ProbeReg)
616
+ .addMBB (LoopMBB);
617
+ }
618
+
619
+ addRegOffset (BuildMI (LoopMBB, DL, TII.get (X86::LEA64r), ProbeReg), JoinReg,
620
+ false , -PageSize);
621
+
622
+ // Probe by storing a byte onto the stack.
623
+ BuildMI (LoopMBB, DL, TII.get (X86::MOV8mi))
624
+ .addReg (ProbeReg)
625
+ .addImm (1 )
626
+ .addReg (0 )
627
+ .addImm (0 )
628
+ .addReg (0 )
629
+ .addImm (0 );
630
+ BuildMI (LoopMBB, DL, TII.get (X86::CMP64rr))
631
+ .addReg (RoundedReg)
632
+ .addReg (ProbeReg);
633
+ BuildMI (LoopMBB, DL, TII.get (X86::JNE_1)).addMBB (LoopMBB);
634
+
635
+ MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI ();
636
+
637
+ // If in prolog, restore RDX and RCX.
638
+ if (InProlog) {
639
+ addRegOffset (BuildMI (*ContinueMBB, ContinueMBBI, DL, TII.get (X86::MOV64rm),
640
+ X86::RCX),
641
+ X86::RSP, false , RCXShadowSlot);
642
+ addRegOffset (BuildMI (*ContinueMBB, ContinueMBBI, DL, TII.get (X86::MOV64rm),
643
+ X86::RDX),
644
+ X86::RSP, false , RDXShadowSlot);
645
+ }
646
+
647
+ // Now that the probing is done, add code to continueMBB to update
648
+ // the stack pointer for real.
649
+ BuildMI (*ContinueMBB, ContinueMBBI, DL, TII.get (X86::SUB64rr), X86::RSP)
650
+ .addReg (X86::RSP)
651
+ .addReg (SizeReg);
652
+
653
+ // Add the control flow edges we need.
654
+ MBB.addSuccessor (ContinueMBB);
655
+ MBB.addSuccessor (RoundMBB);
656
+ RoundMBB->addSuccessor (LoopMBB);
657
+ LoopMBB->addSuccessor (ContinueMBB);
658
+ LoopMBB->addSuccessor (LoopMBB);
659
+
660
+ // Mark all the instructions added to the prolog as frame setup.
661
+ if (InProlog) {
662
+ for (++BeforeMBBI; BeforeMBBI != MBB.end (); ++BeforeMBBI) {
663
+ BeforeMBBI->setFlag (MachineInstr::FrameSetup);
664
+ }
665
+ for (MachineInstr &MI : *RoundMBB) {
666
+ MI.setFlag (MachineInstr::FrameSetup);
667
+ }
668
+ for (MachineInstr &MI : *LoopMBB) {
669
+ MI.setFlag (MachineInstr::FrameSetup);
670
+ }
671
+ for (MachineBasicBlock::iterator CMBBI = ContinueMBB->begin ();
672
+ CMBBI != ContinueMBBI; ++CMBBI) {
673
+ CMBBI->setFlag (MachineInstr::FrameSetup);
674
+ }
675
+ }
676
+
677
+ // Possible TODO: physreg liveness for InProlog case.
678
+
679
+ return ContinueMBBI;
680
+ }
681
+
682
+ MachineInstr *X86FrameLowering::emitStackProbeCall (
683
+ MachineFunction &MF, MachineBasicBlock &MBB,
684
+ MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const {
438
685
bool IsLargeCodeModel = MF.getTarget ().getCodeModel () == CodeModel::Large;
439
686
440
687
unsigned CallOp;
@@ -456,6 +703,7 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
456
703
Symbol = " _chkstk" ;
457
704
458
705
MachineInstrBuilder CI;
706
+ MachineBasicBlock::iterator ExpansionMBBI = std::prev (MBBI);
459
707
460
708
// All current stack probes take AX and SP as input, clobber flags, and
461
709
// preserve all registers. x86_64 probes leave RSP unmodified.
@@ -485,6 +733,26 @@ void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
485
733
.addReg (X86::RSP)
486
734
.addReg (X86::RAX);
487
735
}
736
+
737
+ if (InProlog) {
738
+ // Apply the frame setup flag to all inserted instrs.
739
+ for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
740
+ ExpansionMBBI->setFlag (MachineInstr::FrameSetup);
741
+ }
742
+
743
+ return MBBI;
744
+ }
745
+
746
+ MachineInstr *X86FrameLowering::emitStackProbeInlineStub (
747
+ MachineFunction &MF, MachineBasicBlock &MBB,
748
+ MachineBasicBlock::iterator MBBI, DebugLoc DL, bool InProlog) const {
749
+
750
+ assert (InProlog && " ChkStkStub called outside prolog!" );
751
+
752
+ MachineInstrBuilder CI = BuildMI (MBB, MBBI, DL, TII.get (X86::CALLpcrel32))
753
+ .addExternalSymbol (" __chkstk_stub" );
754
+
755
+ return MBBI;
488
756
}
489
757
490
758
static unsigned calculateSetFPREG (uint64_t SPAdjust) {
@@ -893,26 +1161,18 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
893
1161
// Allocate NumBytes-4 bytes on stack in case of isEAXAlive.
894
1162
// We'll also use 4 already allocated bytes for EAX.
895
1163
BuildMI (MBB, MBBI, DL, TII.get (X86::MOV32ri), X86::EAX)
896
- .addImm (isEAXAlive ? NumBytes - 4 : NumBytes)
897
- .setMIFlag (MachineInstr::FrameSetup);
1164
+ .addImm (isEAXAlive ? NumBytes - 4 : NumBytes)
1165
+ .setMIFlag (MachineInstr::FrameSetup);
898
1166
}
899
1167
900
- // Save a pointer to the MI where we set AX.
901
- MachineBasicBlock::iterator SetRAX = MBBI;
902
- --SetRAX;
903
-
904
1168
// Call __chkstk, __chkstk_ms, or __alloca.
905
- emitStackProbeCall (MF, MBB, MBBI, DL);
906
-
907
- // Apply the frame setup flag to all inserted instrs.
908
- for (; SetRAX != MBBI; ++SetRAX)
909
- SetRAX->setFlag (MachineInstr::FrameSetup);
1169
+ emitStackProbe (MF, MBB, MBBI, DL, true );
910
1170
911
1171
if (isEAXAlive) {
912
1172
// Restore EAX
913
- MachineInstr *MI = addRegOffset ( BuildMI (MF, DL, TII. get (X86::MOV32rm),
914
- X86::EAX),
915
- StackPtr, false , NumBytes - 4 );
1173
+ MachineInstr *MI =
1174
+ addRegOffset ( BuildMI (MF, DL, TII. get (X86::MOV32rm), X86::EAX),
1175
+ StackPtr, false , NumBytes - 4 );
916
1176
MI->setFlag (MachineInstr::FrameSetup);
917
1177
MBB.insert (MBBI, MI);
918
1178
}
0 commit comments