Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -675,21 +675,44 @@ int FrameIdx; int Offset; bool IsGPR; + bool isPaired() const { return Reg2 != AArch64::NoRegister; } }; -static void -computeCalleeSaveRegisterPairs(const std::vector &CSI, - const TargetRegisterInfo *TRI, - SmallVectorImpl &RegPairs) { +static void computeCalleeSaveRegisterPairs( + MachineFunction &MF, const std::vector &CSI, + const TargetRegisterInfo *TRI, SmallVectorImpl &RegPairs) { - unsigned Count = CSI.size(); - assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); + if (CSI.empty()) + return; - for (unsigned i = 0; i < Count; i += 2) { - unsigned idx = Count - i - 2; + AArch64FunctionInfo *AFI = MF.getInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + unsigned Count = CSI.size(); + // MachO's compact unwind format relies on all registers being stored in + // pairs. + assert((!MF.getSubtarget().isTargetMachO() || + (Count & 1) == 0) && + "Odd number of callee-saved regs to spill!"); + unsigned Offset = AFI->getCalleeSavedStackSize(); + + for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; - RPI.Reg1 = CSI[idx].getReg(); - RPI.Reg2 = CSI[idx + 1].getReg(); + RPI.Reg1 = CSI[i].getReg(); + + if (AArch64::GPR64RegClass.contains(RPI.Reg1)) + RPI.IsGPR = true; + else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) + RPI.IsGPR = false; + else + llvm_unreachable("Unexpected callee saved register!"); + + // Add the next reg to the pair if it is in the same register class. + if (i + 1 < Count) { + unsigned NextReg = CSI[i + 1].getReg(); + if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || + (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) + RPI.Reg2 = NextReg; + } // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store @@ -697,26 +720,45 @@ // // The order of the registers in the list is controlled by // getCalleeSavedRegs(), so they will always be in-order, as well. - assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() && + assert((!RPI.isPaired() || + (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && "Out of order callee saved regs!"); - assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); - assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); - RPI.FrameIdx = CSI[idx + 1].getFrameIdx(); - if (AArch64::GPR64RegClass.contains(RPI.Reg1)) - RPI.IsGPR = true; - else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) - RPI.IsGPR = false; - else - llvm_unreachable("Unexpected callee saved register!"); - // Compute offset: i = 0 => offset = Count; - // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc. - RPI.Offset = (i == 0) ? Count : i; + // MachO's compact unwind format relies on all registers being stored in + // adjacent register pairs. + assert((!MF.getSubtarget().isTargetMachO() || + (RPI.isPaired() && + ((RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) || + RPI.Reg1 + 1 == RPI.Reg2))) && + "Callee-save registers not saved as adjacent register pair!"); + + RPI.FrameIdx = CSI[i].getFrameIdx(); + + if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { + // Round up size of non-pair to pair size if we need to pad the + // callee-save area to ensure 16-byte alignment. + Offset -= 16; + assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16); + MFI->setObjectSize(RPI.FrameIdx, 16); + } else + Offset -= RPI.isPaired() ? 16 : 8; + assert(Offset % 8 == 0); + RPI.Offset = Offset / 8; assert((RPI.Offset >= -64 && RPI.Offset <= 63) && "Offset out of bounds for LDP/STP immediate"); RegPairs.push_back(RPI); + if (RPI.isPaired()) + ++i; } + + // Align first offset to even 16-byte boundary to avoid additional SP + // adjustment instructions. + // Last pair offset is size of whole callee-save region for SP + // pre-dec/post-inc. + RegPairInfo &LastPair = RegPairs.back(); + assert(AFI->getCalleeSavedStackSize() % 8 == 0); + LastPair.Offset = AFI->getCalleeSavedStackSize() / 8; } bool AArch64FrameLowering::spillCalleeSavedRegisters( @@ -728,9 +770,9 @@ DebugLoc DL; SmallVector RegPairs; - computeCalleeSaveRegisterPairs(CSI, TRI, RegPairs); + computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); - for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; + for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; unsigned Reg1 = RPI.Reg1; @@ -746,36 +788,48 @@ // Rationale: This sequence saves uop updates compared to a sequence of // pre-increment spills like stp xi,xj,[sp,#-16]! // Note: Similar rationale and sequence for restores in epilog. - bool BumpSP = RPII == RegPairs.begin(); + bool BumpSP = RPII == RegPairs.rbegin(); if (RPI.IsGPR) { // For first spill use pre-increment store. if (BumpSP) - StrOpc = AArch64::STPXpre; + StrOpc = RPI.isPaired() ? AArch64::STPXpre : AArch64::STRXpre; else - StrOpc = AArch64::STPXi; + StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; } else { // For first spill use pre-increment store. if (BumpSP) - StrOpc = AArch64::STPDpre; + StrOpc = RPI.isPaired() ? AArch64::STPDpre : AArch64::STRDpre; else - StrOpc = AArch64::STPDi; + StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; } - DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", " - << TRI->getName(Reg2) << ") -> fi#(" << RPI.FrameIdx - << ", " << RPI.FrameIdx+1 << ")\n"); + DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1); + if (RPI.isPaired()) + dbgs() << ", " << TRI->getName(Reg2); + dbgs() << ") -> fi#(" << RPI.FrameIdx; + if (RPI.isPaired()) + dbgs() << ", " << RPI.FrameIdx+1; + dbgs() << ")\n"); const int Offset = BumpSP ? -RPI.Offset : RPI.Offset; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); if (BumpSP) MIB.addReg(AArch64::SP, RegState::Define); - MBB.addLiveIn(Reg1); - MBB.addLiveIn(Reg2); - MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) + if (RPI.isPaired()) { + MBB.addLiveIn(Reg1); + MBB.addLiveIn(Reg2); + MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) .addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit .setMIFlag(MachineInstr::FrameSetup); + } else { + MBB.addLiveIn(Reg1); + MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) + .addReg(AArch64::SP) + .addImm(BumpSP ? Offset * 8 : Offset) // pre-inc version is unscaled + .setMIFlag(MachineInstr::FrameSetup); + } } return true; } @@ -792,9 +846,9 @@ if (MI != MBB.end()) DL = MI->getDebugLoc(); - computeCalleeSaveRegisterPairs(CSI, TRI, RegPairs); + computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); - for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; + for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; unsigned Reg1 = RPI.Reg1; @@ -808,33 +862,43 @@ // ldp x22, x21, [sp], #48 // addImm(+6) // Note: see comment in spillCalleeSavedRegisters() unsigned LdrOpc; - bool BumpSP = RPII == std::prev(RegPairs.rend()); + bool BumpSP = RPII == std::prev(RegPairs.end()); if (RPI.IsGPR) { if (BumpSP) - LdrOpc = AArch64::LDPXpost; + LdrOpc = RPI.isPaired() ? AArch64::LDPXpost : AArch64::LDRXpost; else - LdrOpc = AArch64::LDPXi; + LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; } else { if (BumpSP) - LdrOpc = AArch64::LDPDpost; + LdrOpc = RPI.isPaired() ? AArch64::LDPDpost : AArch64::LDRDpost; else - LdrOpc = AArch64::LDPDi; + LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; } - DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", " - << TRI->getName(Reg2) << ") -> fi#(" << RPI.FrameIdx - << ", " << RPI.FrameIdx+1 << ")\n"); + DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1); + if (RPI.isPaired()) + dbgs() << ", " << TRI->getName(Reg2); + dbgs() << ") -> fi#(" << RPI.FrameIdx; + if (RPI.isPaired()) + dbgs() << ", " << RPI.FrameIdx+1; + dbgs() << ")\n"); const int Offset = RPI.Offset; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); if (BumpSP) MIB.addReg(AArch64::SP, RegState::Define); - MIB.addReg(Reg2, getDefRegState(true)) + if (RPI.isPaired()) + MIB.addReg(Reg2, getDefRegState(true)) .addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) .addImm(Offset) // [sp], #offset * 8 or [sp, #offset * 8] // where the factor * 8 is implicit .setMIFlag(MachineInstr::FrameDestroy); + else + MIB.addReg(Reg1, getDefRegState(true)) + .addReg(AArch64::SP) + .addImm(BumpSP ? Offset * 8 : Offset) // post-dec version is unscaled + .setMIFlag(MachineInstr::FrameDestroy); } return true; } @@ -851,8 +915,9 @@ const AArch64RegisterInfo *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); AArch64FunctionInfo *AFI = MF.getInfo(); - SmallVector UnspilledCSGPRs; - SmallVector UnspilledCSFPRs; + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + unsigned UnspilledCSGPR = AArch64::NoRegister; + unsigned UnspilledCSGPRPaired = AArch64::NoRegister; // The frame record needs to be created by saving the appropriate registers if (hasFP(MF)) { @@ -860,80 +925,58 @@ SavedRegs.set(AArch64::LR); } - // Spill the BasePtr if it's used. Do this first thing so that the - // getCalleeSavedRegs() below will get the right answer. + unsigned BasePointerReg = AArch64::NoRegister; if (RegInfo->hasBasePointer(MF)) - SavedRegs.set(RegInfo->getBaseRegister()); + BasePointerReg = RegInfo->getBaseRegister(); + unsigned StackAlignReg = AArch64::NoRegister; if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF)) - SavedRegs.set(AArch64::X9); + StackAlignReg = AArch64::X9; - // If any callee-saved registers are used, the frame cannot be eliminated. - unsigned NumGPRSpilled = 0; - unsigned NumFPRSpilled = 0; bool ExtraCSSpill = false; - bool CanEliminateFrame = true; - DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); + // Figure out which callee-saved registers to save/restore. + for (unsigned i = 0; CSRegs[i]; ++i) { + const unsigned Reg = CSRegs[i]; - // Check pairs of consecutive callee-saved registers. - for (unsigned i = 0; CSRegs[i]; i += 2) { - assert(CSRegs[i + 1] && "Odd number of callee-saved registers!"); - - const unsigned OddReg = CSRegs[i]; - const unsigned EvenReg = CSRegs[i + 1]; - assert((AArch64::GPR64RegClass.contains(OddReg) && - AArch64::GPR64RegClass.contains(EvenReg)) ^ - (AArch64::FPR64RegClass.contains(OddReg) && - AArch64::FPR64RegClass.contains(EvenReg)) && - "Register class mismatch!"); - - const bool OddRegUsed = SavedRegs.test(OddReg); - const bool EvenRegUsed = SavedRegs.test(EvenReg); - - // Early exit if none of the registers in the register pair is actually - // used. - if (!OddRegUsed && !EvenRegUsed) { - if (AArch64::GPR64RegClass.contains(OddReg)) { - UnspilledCSGPRs.push_back(OddReg); - UnspilledCSGPRs.push_back(EvenReg); - } else { - UnspilledCSFPRs.push_back(OddReg); - UnspilledCSFPRs.push_back(EvenReg); + // Add the stack re-align scratch register and base pointer register to + // SavedRegs set only if they are callee-save. + if (Reg == BasePointerReg || Reg == StackAlignReg) + SavedRegs.set(Reg); + + bool RegUsed = SavedRegs.test(Reg); + unsigned PairedReg = CSRegs[i ^ 1]; + if (!RegUsed) { + if (AArch64::GPR64RegClass.contains(Reg) && + !RegInfo->isReservedReg(MF, Reg)) { + UnspilledCSGPR = Reg; + // MachO's compact unwind format relies on all registers being stored in + // pairs, so if we need to spill one extra for BigStack, then we need to + // store the pair. + if (Subtarget.isTargetMachO()) + UnspilledCSGPRPaired = PairedReg; } continue; } - unsigned Reg = AArch64::NoRegister; - // If only one of the registers of the register pair is used, make sure to - // mark the other one as used as well. - if (OddRegUsed ^ EvenRegUsed) { - // Find out which register is the additional spill. - Reg = OddRegUsed ? EvenReg : OddReg; - SavedRegs.set(Reg); + // MachO's compact unwind format relies on all registers being stored in + // pairs. + // FIXME: the usual format is actually better if unwinding isn't needed. + if (Subtarget.isTargetMachO() && !SavedRegs.test(PairedReg)) { + SavedRegs.set(PairedReg); + ExtraCSSpill = true; } + } - DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo)); - DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo)); - - assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) || - (RegInfo->getEncodingValue(OddReg) + 1 == - RegInfo->getEncodingValue(EvenReg))) && - "Register pair of non-adjacent registers!"); - if (AArch64::GPR64RegClass.contains(OddReg)) { - NumGPRSpilled += 2; - // If it's not a reserved register, we can use it in lieu of an - // emergency spill slot for the register scavenger. - // FIXME: It would be better to instead keep looking and choose another - // unspilled register that isn't reserved, if there is one. - if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg)) - ExtraCSSpill = true; - } else - NumFPRSpilled += 2; + DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"; + for (int Reg = SavedRegs.find_first(); Reg != -1; + Reg = SavedRegs.find_next(Reg)) + dbgs() << ' ' << PrintReg(Reg, RegInfo); + dbgs() << "\n";); - CanEliminateFrame = false; - } - DEBUG(dbgs() << "\n"); + // If any callee-saved registers are used, the frame cannot be eliminated. + unsigned NumRegsSpilled = SavedRegs.count(); + bool CanEliminateFrame = NumRegsSpilled == 0; // FIXME: Set BigStack if any stack slot references may be out of range. // For now, just conservatively guestimate based on unscaled indexing @@ -942,8 +985,7 @@ // The CSR spill slots have not been allocated yet, so estimateStackSize // won't include them. MachineFrameInfo *MFI = MF.getFrameInfo(); - unsigned CFSize = - MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled); + unsigned CFSize = MFI->estimateStackSize(MF) + 8 * NumRegsSpilled; DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n"); bool BigStack = (CFSize >= 256); if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) @@ -956,20 +998,16 @@ // above to keep the number of spills even, we don't need to do anything else // here. if (BigStack && !ExtraCSSpill) { - - // If we're adding a register to spill here, we have to add two of them - // to keep the number of regs to spill even. - assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!"); - unsigned Count = 0; - while (!UnspilledCSGPRs.empty() && Count < 2) { - unsigned Reg = UnspilledCSGPRs.back(); - UnspilledCSGPRs.pop_back(); - DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo) - << " to get a scratch register.\n"); - SavedRegs.set(Reg); + if (UnspilledCSGPR != AArch64::NoRegister) { + DEBUG(dbgs() << "Spilling " << PrintReg(UnspilledCSGPR, RegInfo) + << " to get a scratch register.\n"); + SavedRegs.set(UnspilledCSGPR); + ++NumRegsSpilled; + if (UnspilledCSGPRPaired != AArch64::NoRegister) { + SavedRegs.set(UnspilledCSGPRPaired); + ++NumRegsSpilled; + } ExtraCSSpill = true; - ++Count; - ++NumGPRSpilled; } // If we didn't find an extra callee-saved register to spill, create @@ -983,5 +1021,7 @@ } } - AFI->setCalleeSavedStackSize(8 * (NumGPRSpilled + NumFPRSpilled)); + // Round up to register pair alignment to avoid additional SP adjustment + // instructions. + AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16)); } Index: test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll =================================================================== --- test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll +++ test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll @@ -1,4 +1,5 @@ ; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios -disable-post-ra < %s | FileCheck %s --check-prefix=CHECK-MACHO ; This test aims to check basic correctness of frame layout & ; frame access code. There are 8 functions in this test file, @@ -97,7 +98,7 @@ ; CHECK-LABEL: novla_nodynamicrealign_call ; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK: stp x20, x19, [sp, #-32]! +; CHECK: str x19, [sp, #-32]! ; Check that the frame pointer is created: ; CHECK: stp x29, x30, [sp, #16] ; CHECK: add x29, sp, #16 @@ -105,8 +106,7 @@ ; CHECK: .cfi_def_cfa w29, 16 ; CHECK: .cfi_offset w30, -8 ; CHECK: .cfi_offset w29, -16 -; CHECK: .cfi_offset w19, -24 -; CHECK: .cfi_offset w20, -32 +; CHECK: .cfi_offset w19, -32 ; Check correct access to arguments passed on the stack, through frame pointer ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] ; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] @@ -114,10 +114,34 @@ ; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12] ; Check epilogue: ; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ldr x19, [sp], #32 ; CHECK: ret ; CHECK: .cfi_endproc +; CHECK-MACHO-LABEL: _novla_nodynamicrealign_call: +; CHECK-MACHO: .cfi_startproc +; Check that used callee-saved registers are saved +; CHECK-MACHO: stp x20, x19, [sp, #-32]! +; Check that the frame pointer is created: +; CHECK-MACHO: stp x29, x30, [sp, #16] +; CHECK-MACHO: add x29, sp, #16 +; Check correctness of cfi pseudo-instructions +; CHECK-MACHO: .cfi_def_cfa w29, 16 +; CHECK-MACHO: .cfi_offset w30, -8 +; CHECK-MACHO: .cfi_offset w29, -16 +; CHECK-MACHO: .cfi_offset w19, -24 +; CHECK-MACHO: .cfi_offset w20, -32 +; Check correct access to arguments passed on the stack, through frame pointer +; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] +; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] +; Check correct access to local variable on the stack, through stack pointer +; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp, #12] +; Check epilogue: +; CHECK-MACHO: ldp x29, x30, [sp, #16] +; CHECK-MACHO: ldp x20, x19, [sp], #32 +; CHECK-MACHO: ret +; CHECK-MACHO: .cfi_endproc + declare i32 @g() #0 @@ -159,7 +183,7 @@ ; CHECK-LABEL: novla_dynamicrealign_call ; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK: stp x20, x19, [sp, #-32]! +; CHECK: str x19, [sp, #-32]! ; Check that the frame pointer is created: ; CHECK: stp x29, x30, [sp, #16] ; CHECK: add x29, sp, #16 @@ -170,8 +194,7 @@ ; CHECK: .cfi_def_cfa w29, 16 ; CHECK: .cfi_offset w30, -8 ; CHECK: .cfi_offset w29, -16 -; CHECK: .cfi_offset w19, -24 -; CHECK: .cfi_offset w20, -32 +; CHECK: .cfi_offset w19, -32 ; Check correct access to arguments passed on the stack, through frame pointer ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] ; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] @@ -181,10 +204,39 @@ ; Check that stack pointer get restored from frame pointer. ; CHECK: sub sp, x29, #16 // =16 ; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ldr x19, [sp], #32 ; CHECK: ret ; CHECK: .cfi_endproc +; CHECK-MACHO-LABEL: _novla_dynamicrealign_call: +; CHECK-MACHO: .cfi_startproc +; Check that used callee-saved registers are saved +; CHECK-MACHO: stp x20, x19, [sp, #-32]! +; Check that the frame pointer is created: +; CHECK-MACHO: stp x29, x30, [sp, #16] +; CHECK-MACHO: add x29, sp, #16 +; Check the dynamic realignment of the stack pointer to a 128-byte boundary +; CHECK-MACHO: sub x9, sp, #96 +; CHECK-MACHO: and sp, x9, #0xffffffffffffff80 +; Check correctness of cfi pseudo-instructions +; CHECK-MACHO: .cfi_def_cfa w29, 16 +; CHECK-MACHO: .cfi_offset w30, -8 +; CHECK-MACHO: .cfi_offset w29, -16 +; CHECK-MACHO: .cfi_offset w19, -24 +; CHECK-MACHO: .cfi_offset w20, -32 +; Check correct access to arguments passed on the stack, through frame pointer +; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] +; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] +; Check correct access to local variable on the stack, through re-aligned stack pointer +; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp] +; Check epilogue: +; Check that stack pointer get restored from frame pointer. +; CHECK-MACHO: sub sp, x29, #16 +; CHECK-MACHO: ldp x29, x30, [sp, #16] +; CHECK-MACHO: ldp x20, x19, [sp], #32 +; CHECK-MACHO: ret +; CHECK-MACHO: .cfi_endproc + ; Function Attrs: nounwind define i32 @novla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 { @@ -336,7 +388,7 @@ ; CHECK-LABEL: vla_dynamicrealign_call ; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK: stp x22, x21, [sp, #-48]! +; CHECK: str x21, [sp, #-48]! ; CHECK: stp x20, x19, [sp, #16] ; Check that the frame pointer is created: ; CHECK: stp x29, x30, [sp, #32] @@ -354,8 +406,7 @@ ; CHECK: .cfi_offset w29, -16 ; CHECK: .cfi_offset w19, -24 ; CHECK: .cfi_offset w20, -32 -; CHECK: .cfi_offset w21, -40 -; CHECK: .cfi_offset w22, -48 +; CHECK: .cfi_offset w21, -48 ; Check correct access to arguments passed on the stack, through frame pointer ; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] @@ -376,10 +427,57 @@ ; CHECK: sub sp, x29, #32 ; CHECK: ldp x29, x30, [sp, #32] ; CHECK: ldp x20, x19, [sp, #16] -; CHECK: ldp x22, x21, [sp], #48 +; CHECK: ldr x21, [sp], #48 ; CHECK: ret ; CHECK: .cfi_endproc +; CHECK-MACHO-LABEL: _vla_dynamicrealign_call: +; CHECK-MACHO: .cfi_startproc +; Check that used callee-saved registers are saved +; CHECK-MACHO: stp x22, x21, [sp, #-48]! +; CHECK-MACHO: stp x20, x19, [sp, #16] +; Check that the frame pointer is created: +; CHECK-MACHO: stp x29, x30, [sp, #32] +; CHECK-MACHO: add x29, sp, #32 +; Check that the stack pointer gets re-aligned to 128 +; bytes & the base pointer (x19) gets initialized to +; this 128-byte aligned area for local variables & +; spill slots +; CHECK-MACHO: sub x9, sp, #80 +; CHECK-MACHO: and sp, x9, #0xffffffffffffff80 +; CHECK-MACHO: mov x19, sp +; Check correctness of cfi pseudo-instructions +; CHECK-MACHO: .cfi_def_cfa w29, 16 +; CHECK-MACHO: .cfi_offset w30, -8 +; CHECK-MACHO: .cfi_offset w29, -16 +; CHECK-MACHO: .cfi_offset w19, -24 +; CHECK-MACHO: .cfi_offset w20, -32 +; CHECK-MACHO: .cfi_offset w21, -40 +; CHECK-MACHO: .cfi_offset w22, -48 +; Check correct access to arguments passed on the stack, through frame pointer +; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] +; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] +; Check correct reservation of 16-byte aligned VLA (size in w0) on stack +; and set-up of base pointer (x19). +; CHECK-MACHO: mov w9, w0 +; CHECK-MACHO: mov x10, sp +; CHECK-MACHO: lsl x9, x9, #2 +; CHECK-MACHO: add x9, x9, #15 +; CHECK-MACHO: and x9, x9, #0x7fffffff0 +; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9 +; CHECK-MACHO: mov sp, x[[VLASPTMP]] +; Check correct access to local variable, through base pointer +; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19] +; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] +; Check epilogue: +; Check that stack pointer get restored from frame pointer. +; CHECK-MACHO: sub sp, x29, #32 +; CHECK-MACHO: ldp x29, x30, [sp, #32] +; CHECK-MACHO: ldp x20, x19, [sp, #16] +; CHECK-MACHO: ldp x22, x21, [sp], #48 +; CHECK-MACHO: ret +; CHECK-MACHO: .cfi_endproc + ; Function Attrs: nounwind define i32 @vla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 { @@ -398,7 +496,7 @@ ; CHECK-LABEL: vla_dynamicrealign_nocall ; Check that used callee-saved registers are saved -; CHECK: stp x20, x19, [sp, #-32]! +; CHECK: str x19, [sp, #-32]! ; Check that the frame pointer is created: ; CHECK: stp x29, x30, [sp, #16] ; CHECK: add x29, sp, #16 @@ -428,9 +526,44 @@ ; Check that stack pointer get restored from frame pointer. ; CHECK: sub sp, x29, #16 ; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ldr x19, [sp], #32 ; CHECK: ret +; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall: +; Check that used callee-saved registers are saved +; CHECK-MACHO: stp x20, x19, [sp, #-32]! +; Check that the frame pointer is created: +; CHECK-MACHO: stp x29, x30, [sp, #16] +; CHECK-MACHO: add x29, sp, #16 +; Check that the stack pointer gets re-aligned to 128 +; bytes & the base pointer (x19) gets initialized to +; this 128-byte aligned area for local variables & +; spill slots +; CHECK-MACHO: sub x9, sp, #96 +; CHECK-MACHO: and sp, x9, #0xffffffffffffff80 +; CHECK-MACHO: mov x19, sp +; Check correct access to arguments passed on the stack, through frame pointer +; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] +; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] +; Check correct reservation of 16-byte aligned VLA (size in w0) on stack +; and set-up of base pointer (x19). +; CHECK-MACHO: mov w9, w0 +; CHECK-MACHO: mov x10, sp +; CHECK-MACHO: lsl x9, x9, #2 +; CHECK-MACHO: add x9, x9, #15 +; CHECK-MACHO: and x9, x9, #0x7fffffff0 +; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9 +; CHECK-MACHO: mov sp, x[[VLASPTMP]] +; Check correct access to local variable, through base pointer +; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19] +; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] +; Check epilogue: +; Check that stack pointer get restored from frame pointer. +; CHECK-MACHO: sub sp, x29, #16 +; CHECK-MACHO: ldp x29, x30, [sp, #16] +; CHECK-MACHO: ldp x20, x19, [sp], #32 +; CHECK-MACHO: ret + ; Function Attrs: nounwind define i32 @vla_dynamicrealign_nocall_large_align(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 { @@ -449,7 +582,7 @@ ; CHECK-LABEL: vla_dynamicrealign_nocall_large_align ; Check that used callee-saved registers are saved -; CHECK: stp x20, x19, [sp, #-32]! +; CHECK: stp x28, x19, [sp, #-32]! ; Check that the frame pointer is created: ; CHECK: stp x29, x30, [sp, #16] ; CHECK: add x29, sp, #16 @@ -479,9 +612,44 @@ ; Check that stack pointer get restored from frame pointer. ; CHECK: sub sp, x29, #16 ; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ldp x28, x19, [sp], #32 ; CHECK: ret +; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall_large_align: +; Check that used callee-saved registers are saved +; CHECK-MACHO: stp x20, x19, [sp, #-32]! +; Check that the frame pointer is created: +; CHECK-MACHO: stp x29, x30, [sp, #16] +; CHECK-MACHO: add x29, sp, #16 +; Check that the stack pointer gets re-aligned to 128 +; bytes & the base pointer (x19) gets initialized to +; this 128-byte aligned area for local variables & +; spill slots +; CHECK-MACHO: sub x9, sp, #7, lsl #12 +; CHECK-MACHO: and sp, x9, #0xffffffffffff8000 +; CHECK-MACHO: mov x19, sp +; Check correct access to arguments passed on the stack, through frame pointer +; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] +; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] +; Check correct reservation of 16-byte aligned VLA (size in w0) on stack +; and set-up of base pointer (x19). +; CHECK-MACHO: mov w9, w0 +; CHECK-MACHO: mov x10, sp +; CHECK-MACHO: lsl x9, x9, #2 +; CHECK-MACHO: add x9, x9, #15 +; CHECK-MACHO: and x9, x9, #0x7fffffff0 +; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9 +; CHECK-MACHO: mov sp, x[[VLASPTMP]] +; Check correct access to local variable, through base pointer +; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19] +; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] +; Check epilogue: +; Check that stack pointer get restored from frame pointer. +; CHECK-MACHO: sub sp, x29, #16 +; CHECK-MACHO: ldp x29, x30, [sp, #16] +; CHECK-MACHO: ldp x20, x19, [sp], #32 +; CHECK-MACHO: ret + define void @realign_conditional(i1 %b) { entry: Index: test/CodeGen/AArch64/alloca.ll =================================================================== --- test/CodeGen/AArch64/alloca.ll +++ test/CodeGen/AArch64/alloca.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK +; RUN: llc -mtriple=arm64-apple-ios -disable-post-ra -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK-MACHO ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -mattr=-fp-armv8 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-NOFP-ARM64 %s declare void @use_addr(i8*) @@ -113,14 +114,21 @@ define void @test_alloca_large_frame(i64 %n) { ; CHECK-LABEL: test_alloca_large_frame: +; CHECK-MACHO-LABEL: test_alloca_large_frame: -; CHECK: stp x20, x19, [sp, #-32]! +; CHECK: stp x28, x19, [sp, #-32]! ; CHECK: stp x29, x30, [sp, #16] ; CHECK: add x29, sp, #16 ; CHECK: sub sp, sp, #1953, lsl #12 ; CHECK: sub sp, sp, #512 +; CHECK-MACHO: stp x20, x19, [sp, #-32]! +; CHECK-MACHO: stp x29, x30, [sp, #16] +; CHECK-MACHO: add x29, sp, #16 +; CHECK-MACHO: sub sp, sp, #1953, lsl #12 +; CHECK-MACHO: sub sp, sp, #512 + %addr1 = alloca i8, i64 %n %addr2 = alloca i64, i64 1000000 @@ -130,7 +138,11 @@ ; CHECK: sub sp, x29, #16 ; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ldp x28, x19, [sp], #32 + +; CHECK-MACHO: sub sp, x29, #16 +; CHECK-MACHO: ldp x29, x30, [sp, #16] +; CHECK-MACHO: ldp x20, x19, [sp], #32 } declare i8* @llvm.stacksave() Index: test/CodeGen/AArch64/arm64-inline-asm.ll =================================================================== --- test/CodeGen/AArch64/arm64-inline-asm.ll +++ test/CodeGen/AArch64/arm64-inline-asm.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm64 -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s +; RUN: llc < %s -mtriple=arm64-apple-ios -aarch64-neon-syntax=apple -no-integrated-as -disable-post-ra | FileCheck %s ; rdar://9167275 Index: test/CodeGen/AArch64/arm64-register-pairing.ll =================================================================== --- test/CodeGen/AArch64/arm64-register-pairing.ll +++ test/CodeGen/AArch64/arm64-register-pairing.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple=arm64-apple-ios < %s | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck -check-prefix CHECK-NOTMACHO %s ; ; rdar://14075006 @@ -23,6 +24,19 @@ ; CHECK: ldp d11, d10, [sp, #32] ; CHECK: ldp d13, d12, [sp, #16] ; CHECK: ldp d15, d14, [sp], #144 + +; CHECK-NOTMACHO-LABEL: odd: +; CHECK-NOTMACHO: stp d14, d12, [sp, #-80]! +; CHECK-NOTMACHO: stp d10, d8, [sp, #16] +; CHECK-NOTMACHO: str x27, [sp, #32] +; CHECK-NOTMACHO: stp x25, x23, [sp, #48] +; CHECK-NOTMACHO: stp x21, x19, [sp, #64] +; CHECK-NOTMACHO: movz x0, #0x2a +; CHECK-NOTMACHO: ldp x21, x19, [sp, #64] +; CHECK-NOTMACHO: ldp x25, x23, [sp, #48] +; CHECK-NOTMACHO: ldr x27, [sp, #32] +; CHECK-NOTMACHO: ldp d10, d8, [sp, #16] +; CHECK-NOTMACHO: ldp d14, d12, [sp], #80 call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x21},~{x23},~{x25},~{x27},~{d8},~{d10},~{d12},~{d14}"() nounwind ret void } @@ -48,6 +62,19 @@ ; CHECK: ldp d11, d10, [sp, #32] ; CHECK: ldp d13, d12, [sp, #16] ; CHECK: ldp d15, d14, [sp], #144 + +; CHECK-NOTMACHO-LABEL: even: +; CHECK-NOTMACHO: stp d15, d13, [sp, #-80]! +; CHECK-NOTMACHO: stp d11, d9, [sp, #16] +; CHECK-NOTMACHO: str x28, [sp, #32] +; CHECK-NOTMACHO: stp x26, x24, [sp, #48] +; CHECK-NOTMACHO: stp x22, x20, [sp, #64] +; CHECK-NOTMACHO: movz x0, #0x2a +; CHECK-NOTMACHO: ldp x22, x20, [sp, #64] +; CHECK-NOTMACHO: ldp x26, x24, [sp, #48] +; CHECK-NOTMACHO: ldr x28, [sp, #32] +; CHECK-NOTMACHO: ldp d11, d9, [sp, #16] +; CHECK-NOTMACHO: ldp d15, d13, [sp], #80 call void asm sideeffect "mov x0, #42", "~{x0},~{x20},~{x22},~{x24},~{x26},~{x28},~{d9},~{d11},~{d13},~{d15}"() nounwind ret void }