Index: lib/Target/AArch64/AArch64FrameLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64FrameLowering.cpp +++ lib/Target/AArch64/AArch64FrameLowering.cpp @@ -675,21 +675,39 @@ int FrameIdx; int Offset; bool IsGPR; + bool isPaired() const { return Reg2 != AArch64::NoRegister; } }; -static void -computeCalleeSaveRegisterPairs(const std::vector &CSI, - const TargetRegisterInfo *TRI, - SmallVectorImpl &RegPairs) { +static void computeCalleeSaveRegisterPairs( + MachineFunction &MF, const std::vector &CSI, + const TargetRegisterInfo *TRI, SmallVectorImpl &RegPairs) { + if (CSI.empty()) + return; + + AArch64FunctionInfo *AFI = MF.getInfo(); + MachineFrameInfo *MFI = MF.getFrameInfo(); unsigned Count = CSI.size(); - assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); + unsigned Offset = AFI->getCalleeSavedStackSize(); - for (unsigned i = 0; i < Count; i += 2) { - unsigned idx = Count - i - 2; + for (unsigned i = 0; i < Count; ++i) { RegPairInfo RPI; - RPI.Reg1 = CSI[idx].getReg(); - RPI.Reg2 = CSI[idx + 1].getReg(); + RPI.Reg1 = CSI[i].getReg(); + + if (AArch64::GPR64RegClass.contains(RPI.Reg1)) + RPI.IsGPR = true; + else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) + RPI.IsGPR = false; + else + llvm_unreachable("Unexpected callee saved register!"); + + // Add the next reg to the pair if it is in the same register class. + if (i + 1 < Count) { + unsigned NextReg = CSI[i + 1].getReg(); + if ((RPI.IsGPR && AArch64::GPR64RegClass.contains(NextReg)) || + (!RPI.IsGPR && AArch64::FPR64RegClass.contains(NextReg))) + RPI.Reg2 = NextReg; + } // GPRs and FPRs are saved in pairs of 64-bit regs. We expect the CSI // list to come in sorted by frame index so that we can issue the store @@ -697,26 +715,36 @@ // // The order of the registers in the list is controlled by // getCalleeSavedRegs(), so they will always be in-order, as well. - assert(CSI[idx].getFrameIdx() + 1 == CSI[idx + 1].getFrameIdx() && + assert((!RPI.isPaired() || + (CSI[i].getFrameIdx() + 1 == CSI[i + 1].getFrameIdx())) && "Out of order callee saved regs!"); - assert((Count & 1) == 0 && "Odd number of callee-saved regs to spill!"); - assert((i & 1) == 0 && "Odd index for callee-saved reg spill!"); - RPI.FrameIdx = CSI[idx + 1].getFrameIdx(); - - if (AArch64::GPR64RegClass.contains(RPI.Reg1)) - RPI.IsGPR = true; - else if (AArch64::FPR64RegClass.contains(RPI.Reg1)) - RPI.IsGPR = false; - else - llvm_unreachable("Unexpected callee saved register!"); - // Compute offset: i = 0 => offset = Count; - // i = 2 => offset = -(Count - 2) + Count = 2 = i; etc. - RPI.Offset = (i == 0) ? Count : i; + RPI.FrameIdx = CSI[i].getFrameIdx(); + + if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) { + // Round up size of non-pair to pair size if we need to pad the + // callee-save area to ensure 16-byte alignment. + Offset -= 16; + assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16); + MFI->setObjectSize(RPI.FrameIdx, 16); + } else + Offset -= RPI.isPaired() ? 16 : 8; + assert(Offset % 8 == 0); + RPI.Offset = Offset / 8; assert((RPI.Offset >= -64 && RPI.Offset <= 63) && "Offset out of bounds for LDP/STP immediate"); RegPairs.push_back(RPI); + if (RPI.isPaired()) + ++i; } + + // Align first offset to even 16-byte boundary to avoid additional SP + // adjustment instructions. + // Last pair offset is size of whole callee-save region for SP + // pre-dec/post-inc. + RegPairInfo &LastPair = RegPairs.back(); + assert(AFI->getCalleeSavedStackSize() % 8 == 0); + LastPair.Offset = AFI->getCalleeSavedStackSize() / 8; } bool AArch64FrameLowering::spillCalleeSavedRegisters( @@ -728,9 +756,9 @@ DebugLoc DL; SmallVector RegPairs; - computeCalleeSaveRegisterPairs(CSI, TRI, RegPairs); + computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); - for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; + for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; unsigned Reg1 = RPI.Reg1; @@ -746,36 +774,48 @@ // Rationale: This sequence saves uop updates compared to a sequence of // pre-increment spills like stp xi,xj,[sp,#-16]! // Note: Similar rationale and sequence for restores in epilog. - bool BumpSP = RPII == RegPairs.begin(); + bool BumpSP = RPII == RegPairs.rbegin(); if (RPI.IsGPR) { // For first spill use pre-increment store. if (BumpSP) - StrOpc = AArch64::STPXpre; + StrOpc = RPI.isPaired() ? AArch64::STPXpre : AArch64::STRXpre; else - StrOpc = AArch64::STPXi; + StrOpc = RPI.isPaired() ? AArch64::STPXi : AArch64::STRXui; } else { // For first spill use pre-increment store. if (BumpSP) - StrOpc = AArch64::STPDpre; + StrOpc = RPI.isPaired() ? AArch64::STPDpre : AArch64::STRDpre; else - StrOpc = AArch64::STPDi; + StrOpc = RPI.isPaired() ? AArch64::STPDi : AArch64::STRDui; } - DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1) << ", " - << TRI->getName(Reg2) << ") -> fi#(" << RPI.FrameIdx - << ", " << RPI.FrameIdx+1 << ")\n"); + DEBUG(dbgs() << "CSR spill: (" << TRI->getName(Reg1); + if (RPI.isPaired()) + dbgs() << ", " << TRI->getName(Reg2); + dbgs() << ") -> fi#(" << RPI.FrameIdx; + if (RPI.isPaired()) + dbgs() << ", " << RPI.FrameIdx+1; + dbgs() << ")\n"); const int Offset = BumpSP ? -RPI.Offset : RPI.Offset; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc)); if (BumpSP) MIB.addReg(AArch64::SP, RegState::Define); - MBB.addLiveIn(Reg1); - MBB.addLiveIn(Reg2); - MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) + if (RPI.isPaired()) { + MBB.addLiveIn(Reg1); + MBB.addLiveIn(Reg2); + MIB.addReg(Reg2, getPrologueDeath(MF, Reg2)) .addReg(Reg1, getPrologueDeath(MF, Reg1)) .addReg(AArch64::SP) .addImm(Offset) // [sp, #offset * 8], where factor * 8 is implicit .setMIFlag(MachineInstr::FrameSetup); + } else { + MBB.addLiveIn(Reg1); + MIB.addReg(Reg1, getPrologueDeath(MF, Reg1)) + .addReg(AArch64::SP) + .addImm(BumpSP ? Offset * 8 : Offset) // pre-inc version is unscaled + .setMIFlag(MachineInstr::FrameSetup); + } } return true; } @@ -792,9 +832,9 @@ if (MI != MBB.end()) DL = MI->getDebugLoc(); - computeCalleeSaveRegisterPairs(CSI, TRI, RegPairs); + computeCalleeSaveRegisterPairs(MF, CSI, TRI, RegPairs); - for (auto RPII = RegPairs.rbegin(), RPIE = RegPairs.rend(); RPII != RPIE; + for (auto RPII = RegPairs.begin(), RPIE = RegPairs.end(); RPII != RPIE; ++RPII) { RegPairInfo RPI = *RPII; unsigned Reg1 = RPI.Reg1; @@ -808,33 +848,43 @@ // ldp x22, x21, [sp], #48 // addImm(+6) // Note: see comment in spillCalleeSavedRegisters() unsigned LdrOpc; - bool BumpSP = RPII == std::prev(RegPairs.rend()); + bool BumpSP = RPII == std::prev(RegPairs.end()); if (RPI.IsGPR) { if (BumpSP) - LdrOpc = AArch64::LDPXpost; + LdrOpc = RPI.isPaired() ? AArch64::LDPXpost : AArch64::LDRXpost; else - LdrOpc = AArch64::LDPXi; + LdrOpc = RPI.isPaired() ? AArch64::LDPXi : AArch64::LDRXui; } else { if (BumpSP) - LdrOpc = AArch64::LDPDpost; + LdrOpc = RPI.isPaired() ? AArch64::LDPDpost : AArch64::LDRDpost; else - LdrOpc = AArch64::LDPDi; + LdrOpc = RPI.isPaired() ? AArch64::LDPDi : AArch64::LDRDui; } - DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1) << ", " - << TRI->getName(Reg2) << ") -> fi#(" << RPI.FrameIdx - << ", " << RPI.FrameIdx+1 << ")\n"); + DEBUG(dbgs() << "CSR restore: (" << TRI->getName(Reg1); + if (RPI.isPaired()) + dbgs() << ", " << TRI->getName(Reg2); + dbgs() << ") -> fi#(" << RPI.FrameIdx; + if (RPI.isPaired()) + dbgs() << ", " << RPI.FrameIdx+1; + dbgs() << ")\n"); const int Offset = RPI.Offset; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdrOpc)); if (BumpSP) MIB.addReg(AArch64::SP, RegState::Define); - MIB.addReg(Reg2, getDefRegState(true)) + if (RPI.isPaired()) + MIB.addReg(Reg2, getDefRegState(true)) .addReg(Reg1, getDefRegState(true)) .addReg(AArch64::SP) .addImm(Offset) // [sp], #offset * 8 or [sp, #offset * 8] // where the factor * 8 is implicit .setMIFlag(MachineInstr::FrameDestroy); + else + MIB.addReg(Reg1, getDefRegState(true)) + .addReg(AArch64::SP) + .addImm(BumpSP ? Offset * 8 : Offset) // post-dec version is unscaled + .setMIFlag(MachineInstr::FrameDestroy); } return true; } @@ -876,60 +926,27 @@ DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:"); const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); - // Check pairs of consecutive callee-saved registers. - for (unsigned i = 0; CSRegs[i]; i += 2) { - assert(CSRegs[i + 1] && "Odd number of callee-saved registers!"); - - const unsigned OddReg = CSRegs[i]; - const unsigned EvenReg = CSRegs[i + 1]; - assert((AArch64::GPR64RegClass.contains(OddReg) && - AArch64::GPR64RegClass.contains(EvenReg)) ^ - (AArch64::FPR64RegClass.contains(OddReg) && - AArch64::FPR64RegClass.contains(EvenReg)) && - "Register class mismatch!"); - - const bool OddRegUsed = SavedRegs.test(OddReg); - const bool EvenRegUsed = SavedRegs.test(EvenReg); - - // Early exit if none of the registers in the register pair is actually - // used. - if (!OddRegUsed && !EvenRegUsed) { - if (AArch64::GPR64RegClass.contains(OddReg)) { - UnspilledCSGPRs.push_back(OddReg); - UnspilledCSGPRs.push_back(EvenReg); - } else { - UnspilledCSFPRs.push_back(OddReg); - UnspilledCSFPRs.push_back(EvenReg); - } - continue; - } + // Figure out which callee-saved registers to save/restore. + for (unsigned i = 0; CSRegs[i]; ++i) { + const unsigned Reg = CSRegs[i]; + bool RegUsed = SavedRegs.test(Reg); - unsigned Reg = AArch64::NoRegister; - // If only one of the registers of the register pair is used, make sure to - // mark the other one as used as well. - if (OddRegUsed ^ EvenRegUsed) { - // Find out which register is the additional spill. - Reg = OddRegUsed ? EvenReg : OddReg; - SavedRegs.set(Reg); + if (AArch64::GPR64RegClass.contains(Reg)) { + if (RegUsed) + ++NumGPRSpilled; + else + UnspilledCSGPRs.push_back(Reg); + } else { + if (RegUsed) + ++NumFPRSpilled; + else + UnspilledCSFPRs.push_back(Reg); } - DEBUG(dbgs() << ' ' << PrintReg(OddReg, RegInfo)); - DEBUG(dbgs() << ' ' << PrintReg(EvenReg, RegInfo)); - - assert(((OddReg == AArch64::LR && EvenReg == AArch64::FP) || - (RegInfo->getEncodingValue(OddReg) + 1 == - RegInfo->getEncodingValue(EvenReg))) && - "Register pair of non-adjacent registers!"); - if (AArch64::GPR64RegClass.contains(OddReg)) { - NumGPRSpilled += 2; - // If it's not a reserved register, we can use it in lieu of an - // emergency spill slot for the register scavenger. - // FIXME: It would be better to instead keep looking and choose another - // unspilled register that isn't reserved, if there is one. - if (Reg != AArch64::NoRegister && !RegInfo->isReservedReg(MF, Reg)) - ExtraCSSpill = true; - } else - NumFPRSpilled += 2; + if (!RegUsed) + continue; + + DEBUG(dbgs() << ' ' << PrintReg(Reg, RegInfo)); CanEliminateFrame = false; } @@ -955,21 +972,20 @@ // register scavenging. If we already spilled an extra callee-saved register // above to keep the number of spills even, we don't need to do anything else // here. - if (BigStack && !ExtraCSSpill) { - - // If we're adding a register to spill here, we have to add two of them - // to keep the number of regs to spill even. - assert(((UnspilledCSGPRs.size() & 1) == 0) && "Odd number of registers!"); - unsigned Count = 0; - while (!UnspilledCSGPRs.empty() && Count < 2) { - unsigned Reg = UnspilledCSGPRs.back(); - UnspilledCSGPRs.pop_back(); - DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo) - << " to get a scratch register.\n"); - SavedRegs.set(Reg); - ExtraCSSpill = true; - ++Count; - ++NumGPRSpilled; + if (BigStack) { + + for (auto RegI = UnspilledCSGPRs.rbegin(), RegE = UnspilledCSGPRs.rend(); + RegI != RegE; ++RegI) { + unsigned Reg = *RegI; + if (!RegInfo->isReservedReg(MF, Reg)) { + DEBUG(dbgs() << "Spilling " << PrintReg(Reg, RegInfo) + << " to get a scratch register.\n"); + SavedRegs.set(Reg); + ExtraCSSpill = true; + ++NumGPRSpilled; + UnspilledCSGPRs.erase(std::next(RegI).base()); + break; + } } // If we didn't find an extra callee-saved register to spill, create @@ -983,5 +999,8 @@ } } - AFI->setCalleeSavedStackSize(8 * (NumGPRSpilled + NumFPRSpilled)); + // Round up to register pair alignment to avoid additional SP adjustment + // instructions. + AFI->setCalleeSavedStackSize( + alignTo(8 * (NumGPRSpilled + NumFPRSpilled), 16)); } Index: test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll =================================================================== --- test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll +++ test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll @@ -97,7 +97,7 @@ ; CHECK-LABEL: novla_nodynamicrealign_call ; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK: stp x20, x19, [sp, #-32]! +; CHECK: str x19, [sp, #-32]! ; Check that the frame pointer is created: ; CHECK: stp x29, x30, [sp, #16] ; CHECK: add x29, sp, #16 @@ -105,8 +105,7 @@ ; CHECK: .cfi_def_cfa w29, 16 ; CHECK: .cfi_offset w30, -8 ; CHECK: .cfi_offset w29, -16 -; CHECK: .cfi_offset w19, -24 -; CHECK: .cfi_offset w20, -32 +; CHECK: .cfi_offset w19, -32 ; Check correct access to arguments passed on the stack, through frame pointer ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] ; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] @@ -114,7 +113,7 @@ ; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12] ; Check epilogue: ; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ldr x19, [sp], #32 ; CHECK: ret ; CHECK: .cfi_endproc @@ -159,7 +158,7 @@ ; CHECK-LABEL: novla_dynamicrealign_call ; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK: stp x20, x19, [sp, #-32]! +; CHECK: str x19, [sp, #-32]! ; Check that the frame pointer is created: ; CHECK: stp x29, x30, [sp, #16] ; CHECK: add x29, sp, #16 @@ -170,8 +169,7 @@ ; CHECK: .cfi_def_cfa w29, 16 ; CHECK: .cfi_offset w30, -8 ; CHECK: .cfi_offset w29, -16 -; CHECK: .cfi_offset w19, -24 -; CHECK: .cfi_offset w20, -32 +; CHECK: .cfi_offset w19, -32 ; Check correct access to arguments passed on the stack, through frame pointer ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] ; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] @@ -181,7 +179,7 @@ ; Check that stack pointer get restored from frame pointer. ; CHECK: sub sp, x29, #16 // =16 ; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ldr x19, [sp], #32 ; CHECK: ret ; CHECK: .cfi_endproc @@ -336,7 +334,7 @@ ; CHECK-LABEL: vla_dynamicrealign_call ; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK: stp x22, x21, [sp, #-48]! +; CHECK: str x21, [sp, #-48]! ; CHECK: stp x20, x19, [sp, #16] ; Check that the frame pointer is created: ; CHECK: stp x29, x30, [sp, #32] @@ -354,8 +352,7 @@ ; CHECK: .cfi_offset w29, -16 ; CHECK: .cfi_offset w19, -24 ; CHECK: .cfi_offset w20, -32 -; CHECK: .cfi_offset w21, -40 -; CHECK: .cfi_offset w22, -48 +; CHECK: .cfi_offset w21, -48 ; Check correct access to arguments passed on the stack, through frame pointer ; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] ; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] @@ -376,7 +373,7 @@ ; CHECK: sub sp, x29, #32 ; CHECK: ldp x29, x30, [sp, #32] ; CHECK: ldp x20, x19, [sp, #16] -; CHECK: ldp x22, x21, [sp], #48 +; CHECK: ldr x21, [sp], #48 ; CHECK: ret ; CHECK: .cfi_endproc @@ -398,7 +395,7 @@ ; CHECK-LABEL: vla_dynamicrealign_nocall ; Check that used callee-saved registers are saved -; CHECK: stp x20, x19, [sp, #-32]! +; CHECK: str x19, [sp, #-32]! ; Check that the frame pointer is created: ; CHECK: stp x29, x30, [sp, #16] ; CHECK: add x29, sp, #16 @@ -428,7 +425,7 @@ ; Check that stack pointer get restored from frame pointer. ; CHECK: sub sp, x29, #16 ; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ldr x19, [sp], #32 ; CHECK: ret @@ -449,7 +446,7 @@ ; CHECK-LABEL: vla_dynamicrealign_nocall_large_align ; Check that used callee-saved registers are saved -; CHECK: stp x20, x19, [sp, #-32]! +; CHECK: stp x28, x19, [sp, #-32]! ; Check that the frame pointer is created: ; CHECK: stp x29, x30, [sp, #16] ; CHECK: add x29, sp, #16 @@ -479,7 +476,7 @@ ; Check that stack pointer get restored from frame pointer. ; CHECK: sub sp, x29, #16 ; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ldp x28, x19, [sp], #32 ; CHECK: ret Index: test/CodeGen/AArch64/alloca.ll =================================================================== --- test/CodeGen/AArch64/alloca.ll +++ test/CodeGen/AArch64/alloca.ll @@ -115,7 +115,7 @@ ; CHECK-LABEL: test_alloca_large_frame: -; CHECK: stp x20, x19, [sp, #-32]! +; CHECK: stp x28, x19, [sp, #-32]! ; CHECK: stp x29, x30, [sp, #16] ; CHECK: add x29, sp, #16 ; CHECK: sub sp, sp, #1953, lsl #12 @@ -130,7 +130,7 @@ ; CHECK: sub sp, x29, #16 ; CHECK: ldp x29, x30, [sp, #16] -; CHECK: ldp x20, x19, [sp], #32 +; CHECK: ldp x28, x19, [sp], #32 } declare i8* @llvm.stacksave() Index: test/CodeGen/AArch64/arm64-frame-index.ll =================================================================== --- test/CodeGen/AArch64/arm64-frame-index.ll +++ test/CodeGen/AArch64/arm64-frame-index.ll @@ -5,7 +5,7 @@ entry: ; CHECK-LABEL: t1: ; CHECK-NOT: add x{{[0-9]+}}, sp -; CHECK: stp x28, x27, [sp, #-16]! +; CHECK: str x28, [sp, #-16]! %v = alloca [288 x i32], align 4 unreachable } Index: test/CodeGen/AArch64/arm64-inline-asm.ll =================================================================== --- test/CodeGen/AArch64/arm64-inline-asm.ll +++ test/CodeGen/AArch64/arm64-inline-asm.ll @@ -72,7 +72,7 @@ define void @t8() nounwind ssp { entry: ; CHECK-LABEL: t8: -; CHECK: stp {{d[0-9]+}}, {{d[0-9]+}}, [sp, #-16] +; CHECK: str d8, [sp, #-16] tail call void asm sideeffect "nop", "~{v8}"() nounwind ret void } Index: test/CodeGen/AArch64/arm64-register-pairing.ll =================================================================== --- test/CodeGen/AArch64/arm64-register-pairing.ll +++ test/CodeGen/AArch64/arm64-register-pairing.ll @@ -4,50 +4,34 @@ define void @odd() nounwind { ; CHECK-LABEL: odd: -; CHECK: stp d15, d14, [sp, #-144]! -; CHECK: stp d13, d12, [sp, #16] -; CHECK: stp d11, d10, [sp, #32] -; CHECK: stp d9, d8, [sp, #48] -; CHECK: stp x28, x27, [sp, #64] -; CHECK: stp x26, x25, [sp, #80] -; CHECK: stp x24, x23, [sp, #96] -; CHECK: stp x22, x21, [sp, #112] -; CHECK: stp x20, x19, [sp, #128] +; CHECK: stp d14, d12, [sp, #-80]! +; CHECK: stp d10, d8, [sp, #16] +; CHECK: str x27, [sp, #32] +; CHECK: stp x25, x23, [sp, #48] +; CHECK: stp x21, x19, [sp, #64] ; CHECK: movz x0, #0x2a -; CHECK: ldp x20, x19, [sp, #128] -; CHECK: ldp x22, x21, [sp, #112] -; CHECK: ldp x24, x23, [sp, #96] -; CHECK: ldp x26, x25, [sp, #80] -; CHECK: ldp x28, x27, [sp, #64] -; CHECK: ldp d9, d8, [sp, #48] -; CHECK: ldp d11, d10, [sp, #32] -; CHECK: ldp d13, d12, [sp, #16] -; CHECK: ldp d15, d14, [sp], #144 +; CHECK: ldp x21, x19, [sp, #64] +; CHECK: ldp x25, x23, [sp, #48] +; CHECK: ldr x27, [sp, #32] +; CHECK: ldp d10, d8, [sp, #16] +; CHECK: ldp d14, d12, [sp], #80 call void asm sideeffect "mov x0, #42", "~{x0},~{x19},~{x21},~{x23},~{x25},~{x27},~{d8},~{d10},~{d12},~{d14}"() nounwind ret void } define void @even() nounwind { ; CHECK-LABEL: even: -; CHECK: stp d15, d14, [sp, #-144]! -; CHECK: stp d13, d12, [sp, #16] -; CHECK: stp d11, d10, [sp, #32] -; CHECK: stp d9, d8, [sp, #48] -; CHECK: stp x28, x27, [sp, #64] -; CHECK: stp x26, x25, [sp, #80] -; CHECK: stp x24, x23, [sp, #96] -; CHECK: stp x22, x21, [sp, #112] -; CHECK: stp x20, x19, [sp, #128] +; CHECK: stp d15, d13, [sp, #-80]! +; CHECK: stp d11, d9, [sp, #16] +; CHECK: str x28, [sp, #32] +; CHECK: stp x26, x24, [sp, #48] +; CHECK: stp x22, x20, [sp, #64] ; CHECK: movz x0, #0x2a -; CHECK: ldp x20, x19, [sp, #128] -; CHECK: ldp x22, x21, [sp, #112] -; CHECK: ldp x24, x23, [sp, #96] -; CHECK: ldp x26, x25, [sp, #80] -; CHECK: ldp x28, x27, [sp, #64] -; CHECK: ldp d9, d8, [sp, #48] -; CHECK: ldp d11, d10, [sp, #32] -; CHECK: ldp d13, d12, [sp, #16] -; CHECK: ldp d15, d14, [sp], #144 +; CHECK: ldp x22, x20, [sp, #64] +; CHECK: ldp x26, x24, [sp, #48] +; CHECK: ldr x28, [sp, #32] +; CHECK: ldp d11, d9, [sp, #16] +; CHECK: ldp d15, d13, [sp], #80 call void asm sideeffect "mov x0, #42", "~{x0},~{x20},~{x22},~{x24},~{x26},~{x28},~{d9},~{d11},~{d13},~{d15}"() nounwind ret void } Index: test/CodeGen/AArch64/arm64-shrink-wrapping.ll =================================================================== --- test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -405,7 +405,7 @@ ; ; Prologue code. ; Make sure we save the CSR used in the inline asm: x19. -; CHECK: stp [[CSR1:x[0-9]+]], [[CSR2:x19]], [sp, #-16]! +; CHECK: str [[CSR:x19]], [sp, #-16]! ; ; DISABLE: cbz w0, [[ELSE_LABEL:LBB[0-9_]+]] ; @@ -419,13 +419,13 @@ ; Next BB. ; CHECK: mov w0, wzr ; Epilogue code. -; CHECK-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16 +; CHECK-NEXT: ldr [[CSR]], [sp], #16 ; CHECK-NEXT: ret ; Next BB. ; CHECK: [[ELSE_LABEL]]: ; %if.else ; CHECK-NEXT: lsl w0, w1, #1 ; Epilogue code. -; DISABLE-NEXT: ldp [[CSR1]], [[CSR2]], [sp], #16 +; DISABLE-NEXT: ldr [[CSR]], [sp], #16 ; CHECK-NEXT: ret define i32 @inlineAsm(i32 %cond, i32 %N) { entry: