Index: clang/docs/ClangCommandLineReference.rst =================================================================== --- clang/docs/ClangCommandLineReference.rst +++ clang/docs/ClangCommandLineReference.rst @@ -2430,6 +2430,13 @@ ARM --- +.. option:: -ffixed-r4 + +Reserve the r4 register (ARM only) + +.. option:: -ffixed-r5 + +Reserve the r5 register (ARM only) .. option:: -ffixed-r6 Index: clang/include/clang/Driver/Options.td =================================================================== --- clang/include/clang/Driver/Options.td +++ clang/include/clang/Driver/Options.td @@ -2263,7 +2263,7 @@ def mno_restrict_it: Flag<["-"], "mno-restrict-it">, Group, HelpText<"Allow generation of deprecated IT blocks for ARMv8. It is off by default for ARMv8 Thumb mode">; def marm : Flag<["-"], "marm">, Alias; -foreach i = {6-11} in +foreach i = {4-11} in def ffixed_r#i : Flag<["-"], "ffixed-r"#i>, Group, HelpText<"Reserve the r"#i#" register (ARM only)">; def mno_movt : Flag<["-"], "mno-movt">, Group, Index: clang/lib/Driver/ToolChains/Arch/ARM.cpp =================================================================== --- clang/lib/Driver/ToolChains/Arch/ARM.cpp +++ clang/lib/Driver/ToolChains/Arch/ARM.cpp @@ -619,6 +619,8 @@ #define HANDLE_FFIXED_R(n) \ if (Args.hasArg(options::OPT_ffixed_r##n)) \ Features.push_back("+reserve-r" #n) + HANDLE_FFIXED_R(4); + HANDLE_FFIXED_R(5); HANDLE_FFIXED_R(6); HANDLE_FFIXED_R(7); HANDLE_FFIXED_R(8); Index: llvm/lib/Target/ARM/ARM.td =================================================================== --- llvm/lib/Target/ARM/ARM.td +++ llvm/lib/Target/ARM/ARM.td @@ -391,7 +391,7 @@ "Enable the generation of " "execute only code.">; -foreach i = {6-11} in +foreach i = {4-11} in def FeatureReserveR#i : SubtargetFeature<"reserve-r"#i, "ReservedGPRegisters["#i#"]", "true", "Reserve R"#i#", making it " Index: llvm/lib/Target/ARM/ARMFrameLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMFrameLowering.h +++ llvm/lib/Target/ARM/ARMFrameLowering.h @@ -9,6 +9,7 @@ #ifndef LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H #define LLVM_LIB_TARGET_ARM_ARMFRAMELOWERING_H +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include @@ -71,6 +72,11 @@ return false; } + // Return a non-reserved general purpose register that can be used as scratch + // register + unsigned getScratchRegister(const MachineFunction &MF, + BitVector inUse = BitVector()) const; + private: void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector &CSI, unsigned StmOpc, Index: llvm/lib/Target/ARM/ARMFrameLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -73,7 +73,7 @@ static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, - unsigned NumAlignedDPRCS2Regs); + unsigned NumAlignedDPRCS2Regs, unsigned ScratchReg); ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)), @@ -133,6 +133,24 @@ return !MFI.hasVarSizedObjects(); } +// Decide which register out of r4-r11 can be used as a scratch register, based +// on whether they're free according to RegisterInfo and not excluded by inUse +unsigned ARMFrameLowering::getScratchRegister(const MachineFunction &MF, + BitVector InUse) const { + bool HasFP = hasFP(MF); + unsigned R4 = ARM::R4 - ARM::GPRRegClass.begin()[0]; + for (unsigned i = R4; i < ARM::GPRRegClass.getNumRegs() - R4; i++) { + if (!STI.isGPRegisterReserved(i) && + !((InUse.size() >= i && InUse[i]) || + (HasFP && + i == STI.getInstrInfo()->getRegisterInfo().getFrameRegister(MF)))) { + return ARM::GPRRegClass.getRegister(i); + } + } + report_fatal_error("ran out of registers: Too many registers reserved"); + return ARM::NoRegister; +} + /// canSimplifyCallFramePseudos - If there is a reserved call frame, the /// call frame pseudos can be simplified. Unlike most targets, having a FP /// is not sufficient here since we still may reference some objects via SP @@ -506,7 +524,8 @@ // Move past the aligned DPRCS2 area. if (AFI->getNumAlignedDPRCS2Regs() > 0) { - MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs()); + MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs(), + getScratchRegister(MF)); // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and // leaves the stack pointer pointing to the DPRCS2 area. // @@ -516,6 +535,22 @@ NumBytes = DPRCSOffset; if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) { + // r4 might be used as global named register, which means its value has not + // been and must not be saved to the stack. Thus temporarily save its + // content in another register so r4 is free to be used for __chkstk + unsigned BackupR4 = ARM::NoRegister; + if (STI.isGPRegisterReserved(ARM::R4 - ARM::GPRRegClass.begin()[0])) { + if (!STI.hasV6Ops()) + // Do not allow r4 to be reserved if segmented stacks are used on a + // target where 'mov low_reg, low_reg' is not possible + report_fatal_error("-ffixed-r4 is not allowed for this target when " + "segmented stacks are in use."); + BackupR4 = getScratchRegister(MF); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BackupR4) + .addReg(ARM::R4) + .add(predOps(ARMCC::AL)); + } + uint32_t NumWords = NumBytes >> 2; if (NumWords < 65536) @@ -560,6 +595,13 @@ .add(predOps(ARMCC::AL)) .add(condCodeOp()); NumBytes = 0; + + // If r4 is actually a fixed register, restore it + if (BackupR4) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) + .addReg(BackupR4) + .add(predOps(ARMCC::AL)); + } } if (NumBytes) { @@ -723,20 +765,21 @@ emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign, false); } else { - // We cannot use sp as source/dest register here, thus we're using r4 to - // perform the calculations. We're emitting the following sequence: + // We cannot use sp as source/dest register here, thus we're using a + // scratch register to perform the calculations. We're emitting the + // following sequence (ex.: scratchReg=r4): // mov r4, sp // -- use emitAligningInstructions to produce best sequence to zero // -- out lower bits in r4 // mov sp, r4 - // FIXME: It will be better just to find spare register here. - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) + unsigned ScratchReg = getScratchRegister(MF); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ScratchReg) .addReg(ARM::SP, RegState::Kill) .add(predOps(ARMCC::AL)); - emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign, - false); + emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ScratchReg, + MaxAlign, false); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) - .addReg(ARM::R4, RegState::Kill) + .addReg(ScratchReg, RegState::Kill) .add(predOps(ARMCC::AL)); } @@ -827,13 +870,14 @@ // sub sp, #24 // This is bad, if an interrupt is taken after the mov, sp is in an // inconsistent state. - // Use the first callee-saved register as a scratch register. - assert(!MFI.getPristineRegs(MF).test(ARM::R4) && + // Use the scratch register. + unsigned ScratchReg = getScratchRegister(MF); + assert(!MFI.getPristineRegs(MF).test(ScratchReg) && "No scratch register to restore SP from FP!"); - emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, + emitT2RegPlusImmediate(MBB, MBBI, dl, ScratchReg, FramePtr, -NumBytes, ARMCC::AL, 0, TII); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) - .addReg(ARM::R4) + .addReg(ScratchReg) .add(predOps(ARMCC::AL)); } } else { @@ -1163,7 +1207,8 @@ MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector &CSI, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + unsigned ScratchReg) { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo(); DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); @@ -1192,8 +1237,8 @@ MFI.setObjectAlignment(FI, MFI.getMaxAlignment()); } - // Move the stack pointer to the d8 spill slot, and align it at the same - // time. Leave the stack slot address in the scratch register r4. + // Move the stack pointer to the d8 spill slot, and align it at the same time. + // Leave the stack slot address in the scratch register. (ex.: scratchReg=r4) // // sub r4, sp, #numregs * 8 // bic r4, r4, #align - 1 @@ -1206,7 +1251,7 @@ // sub r4, sp, #numregs * 8 // The immediate is <= 64, so it doesn't need any special encoding. unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri; - BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + BuildMI(MBB, MI, DL, TII.get(Opc), ScratchReg) .addReg(ARM::SP) .addImm(8 * NumAlignedDPRCS2Regs) .add(predOps(ARMCC::AL)) @@ -1218,21 +1263,22 @@ // stack alignment. Luckily, this can always be done since all ARM // architecture versions that support Neon also support the BFC // instruction. - emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true); + emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ScratchReg, MaxAlign, + true); // mov sp, r4 // The stack pointer must be adjusted before spilling anything, otherwise // the stack slots could be clobbered by an interrupt handler. - // Leave r4 live, it is used below. + // Leave scratch register live, it is used below. Opc = isThumb ? ARM::tMOVr : ARM::MOVr; MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP) - .addReg(ARM::R4) + .addReg(ScratchReg) .add(predOps(ARMCC::AL)); if (!isThumb) MIB.add(condCodeOp()); // Now spill NumAlignedDPRCS2Regs registers starting from d8. - // r4 holds the stack slot address. + // The scratch register holds the stack slot address. unsigned NextReg = ARM::D8; // 16-byte aligned vst1.64 with 4 d-regs and address writeback. @@ -1241,8 +1287,8 @@ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); MBB.addLiveIn(SupReg); - BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4) - .addReg(ARM::R4, RegState::Kill) + BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ScratchReg) + .addReg(ScratchReg, RegState::Kill) .addImm(16) .addReg(NextReg) .addReg(SupReg, RegState::ImplicitKill) @@ -1251,8 +1297,8 @@ NumAlignedDPRCS2Regs -= 4; } - // We won't modify r4 beyond this point. It currently points to the next - // register to be spilled. + // We won't modify the scratch register beyond this point. It currently points + // to the next register to be spilled. unsigned R4BaseReg = NextReg; // 16-byte aligned vst1.64 with 4 d-regs, no writeback. @@ -1261,7 +1307,7 @@ &ARM::QQPRRegClass); MBB.addLiveIn(SupReg); BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q)) - .addReg(ARM::R4) + .addReg(ScratchReg) .addImm(16) .addReg(NextReg) .addReg(SupReg, RegState::ImplicitKill) @@ -1276,7 +1322,7 @@ &ARM::QPRRegClass); MBB.addLiveIn(SupReg); BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64)) - .addReg(ARM::R4) + .addReg(ScratchReg) .addImm(16) .addReg(SupReg) .add(predOps(ARMCC::AL)); @@ -1290,23 +1336,24 @@ // vstr.64 uses addrmode5 which has an offset scale of 4. BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD)) .addReg(NextReg) - .addReg(ARM::R4) + .addReg(ScratchReg) .addImm((NextReg - R4BaseReg) * 2) .add(predOps(ARMCC::AL)); } - // The last spill instruction inserted should kill the scratch register r4. - std::prev(MI)->addRegisterKilled(ARM::R4, TRI); + // The last spill instruction inserted should kill the scratch register. + std::prev(MI)->addRegisterKilled(ScratchReg, TRI); } /// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an /// iterator to the following instruction. static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, - unsigned NumAlignedDPRCS2Regs) { + unsigned NumAlignedDPRCS2Regs, unsigned ScratchReg) { // sub r4, sp, #numregs * 8 // bic r4, r4, #align - 1 // mov sp, r4 + // (ex.: scratchReg=r4) ++MI; ++MI; ++MI; assert(MI->mayStore() && "Expecting spill instruction"); @@ -1323,7 +1370,7 @@ case 1: case 2: case 4: - assert(MI->killsRegister(ARM::R4) && "Missed kill flag"); + assert(MI->killsRegister(ScratchReg) && "Missed kill flag"); ++MI; } return MI; @@ -1336,7 +1383,8 @@ MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, const std::vector &CSI, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + unsigned ScratchReg) { MachineFunction &MF = *MBB.getParent(); ARMFunctionInfo *AFI = MF.getInfo(); DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); @@ -1350,7 +1398,7 @@ break; } - // Materialize the address of the d8 spill slot into the scratch register r4. + // Materialize the address of the d8 spill slot into the scratch register. // This can be fairly complicated if the stack frame is large, so just use // the normal frame index elimination mechanism to do it. This code runs as // the initial part of the epilog where the stack and base pointers haven't @@ -1359,7 +1407,7 @@ assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1"); unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; - BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4) + BuildMI(MBB, MI, DL, TII.get(Opc), ScratchReg) .addFrameIndex(D8SpillFI) .addImm(0) .add(predOps(ARMCC::AL)) @@ -1373,8 +1421,8 @@ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg) - .addReg(ARM::R4, RegState::Define) - .addReg(ARM::R4, RegState::Kill) + .addReg(ScratchReg, RegState::Define) + .addReg(ScratchReg, RegState::Kill) .addImm(16) .addReg(SupReg, RegState::ImplicitDefine) .add(predOps(ARMCC::AL)); @@ -1382,8 +1430,8 @@ NumAlignedDPRCS2Regs -= 4; } - // We won't modify r4 beyond this point. It currently points to the next - // register to be spilled. + // We won't modify the scratch register beyond this point. It currently points + // to the next register to be spilled. unsigned R4BaseReg = NextReg; // 16-byte aligned vld1.64 with 4 d-regs, no writeback. @@ -1391,7 +1439,7 @@ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QQPRRegClass); BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg) - .addReg(ARM::R4) + .addReg(ScratchReg) .addImm(16) .addReg(SupReg, RegState::ImplicitDefine) .add(predOps(ARMCC::AL)); @@ -1404,7 +1452,7 @@ unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0, &ARM::QPRRegClass); BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg) - .addReg(ARM::R4) + .addReg(ScratchReg) .addImm(16) .add(predOps(ARMCC::AL)); NextReg += 2; @@ -1414,12 +1462,12 @@ // Finally, use a vanilla vldr.64 for the remaining odd register. if (NumAlignedDPRCS2Regs) BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg) - .addReg(ARM::R4) + .addReg(ScratchReg) .addImm(2 * (NextReg - R4BaseReg)) .add(predOps(ARMCC::AL)); - // Last store kills r4. - std::prev(MI)->addRegisterKilled(ARM::R4, TRI); + // Last store kills the scratch register. + std::prev(MI)->addRegisterKilled(ScratchReg, TRI); } bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, @@ -1448,7 +1496,8 @@ // The stack realignment code will be inserted between the push instructions // and these spills. if (NumAlignedDPRCS2Regs) - emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); + emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI, + getScratchRegister(MF)); return true; } @@ -1468,7 +1517,8 @@ // The emitPopInst calls below do not insert reloads for the aligned DPRCS2 // registers. Do that here instead. if (NumAlignedDPRCS2Regs) - emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); + emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI, + getScratchRegister(MF)); unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; @@ -1586,8 +1636,8 @@ // In functions that realign the stack, it can be an advantage to spill the // callee-saved vector registers after realigning the stack. The vst1 and vld1 // instructions take alignment hints that can improve performance. -static void -checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) { +static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs, + unsigned ScratchReg) { MF.getInfo()->setNumAlignedDPRCS2Regs(0); if (!SpillAlignedNEONRegs) return; @@ -1627,7 +1677,7 @@ MF.getInfo()->setNumAlignedDPRCS2Regs(NumSpills); // A scratch register is required for the vst1 / vld1 instructions. - SavedRegs.set(ARM::R4); + SavedRegs.set(ScratchReg); } void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, @@ -1656,23 +1706,23 @@ (void)TRI; // Silence unused warning in non-assert builds. Register FramePtr = RegInfo->getFrameRegister(MF); - // Spill R4 if Thumb2 function requires stack realignment - it will be used as - // scratch register. Also spill R4 if Thumb2 function has varsized objects, - // since it's not always possible to restore sp from fp in a single - // instruction. - // FIXME: It will be better just to find spare register here. + unsigned ScratchReg = getScratchRegister(MF); + + // Spill scratch register if Thumb2 function requires stack realignment. + // Also spill it if Thumb2 function has varsized objects, since it's not + // always possible to restore sp from fp in a single instruction. if (AFI->isThumb2Function() && (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) - SavedRegs.set(ARM::R4); + SavedRegs.set(ScratchReg); - // If a stack probe will be emitted, spill R4 and LR, since they are - // clobbered by the stack probe call. + // If a stack probe will be emitted, spill scratch register and LR, since they + // are clobbered by the stack probe call. // This estimate should be a safe, conservative estimate. The actual // stack probe is enabled based on the size of the local objects; // this estimate also includes the varargs store size. if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) { - SavedRegs.set(ARM::R4); + SavedRegs.set(ScratchReg); SavedRegs.set(ARM::LR); } @@ -1681,19 +1731,19 @@ if (AFI->getArgRegsSaveSize() > 0) SavedRegs.set(ARM::LR); - // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function - // requires stack alignment. We don't know for sure what the stack size - // will be, but for this, an estimate is good enough. If there anything - // changes it, it'll be a spill, which implies we've used all the registers - // and so R4 is already used, so not marking it here will be OK. - // FIXME: It will be better just to find spare register here. + // Spill the scratch register if Thumb1 epilogue has to restore SP from FP + // or the function requires stack alignment. We don't know for sure what + // the stack size will be, but for this, an estimate is good enough. If + // there anything changes it, it'll be a spill, which implies we've used all + // the registers and so R4 is already used, so not marking it here will be + // OK. if (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) || MFI.estimateStackSize(MF) > 508) - SavedRegs.set(ARM::R4); + SavedRegs.set(ScratchReg); } // See if we can spill vector registers to aligned stack. - checkNumAlignedDPRCS2Regs(MF, SavedRegs); + checkNumAlignedDPRCS2Regs(MF, SavedRegs, ScratchReg); // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) @@ -2292,10 +2342,39 @@ return; } - // Use R4 and R5 as scratch registers. - // We save R4 and R5 before use and restore them before leaving the function. - unsigned ScratchReg0 = ARM::R4; - unsigned ScratchReg1 = ARM::R5; + // We will use r4 to hold stack limit and r5 to hold the stack size + // requested and arguments for __morestack(). + + // r4 and r5 might be used as global named registers. If that is the case, + // temporarily save their content in other registers so they're free + // to be used for __morestack + unsigned BackupR4 = ARM::NoRegister; + unsigned BackupR5 = ARM::NoRegister; + BitVector UsedRegs = BitVector(ARM::GPRRegClass.getNumRegs()); + // make sure we won't get the suggestion to use r4/r5 as backup reg + unsigned R4 = ARM::R4 - ARM::GPRRegClass.begin()[0]; + unsigned R5 = ARM::R5 - ARM::GPRRegClass.begin()[0]; + UsedRegs[R4] = true; + UsedRegs[R5] = true; + + if (STI.isGPRegisterReserved(R4)) { + if (!STI.hasV6Ops()) + // Do not allow r4 to be reserved if segmented stacks are used on a + // target where 'mov low_reg, low_reg' is not possible + report_fatal_error("-ffixed-r4 is not allowed for this target when " + "segmented stacks are in use."); + BackupR4 = getScratchRegister(MF, UsedRegs); + UsedRegs[BackupR4 - ARM::GPRRegClass.begin()[0]] = true; + } + if (STI.isGPRegisterReserved(R5)) { + if (!STI.hasV6Ops()) + // Do not allow r5 to be reserved if segmented stacks are used on a + // target where 'mov low_reg, low_reg' is not possible + report_fatal_error("-ffixed-r5 is not allowed for this target when " + "segmented stacks are in use."); + BackupR5 = getScratchRegister(MF, UsedRegs); + } + uint64_t AlignedStackSize; MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock(); @@ -2354,27 +2433,37 @@ // boundary directly to the value of the stack pointer, per gcc. bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable; - // We will use two of the callee save registers as scratch registers so we - // need to save those registers onto the stack. - // We will use SR0 to hold stack limit and SR1 to hold the stack size - // requested and arguments for __morestack(). - // SR0: Scratch Register #0 - // SR1: Scratch Register #1 - // push {SR0, SR1} + // We will use r4 and r5 either directly or use scratch registers to save + // their values, so we need to save those registers onto the stack. + // push {r4/sr4, r5/sr5} + unsigned sortedPushList[2] = { + std::min(BackupR4 ? BackupR4 : (unsigned)ARM::R4, + BackupR5 ? BackupR5 : (unsigned)ARM::R5), + std::max(BackupR5 ? BackupR5 : (unsigned)ARM::R5, + BackupR4 ? BackupR4 : (unsigned)ARM::R4)}; if (Thumb) { BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH)) .add(predOps(ARMCC::AL)) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + .addReg(sortedPushList[0]) + .addReg(sortedPushList[1]); } else { BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD)) .addReg(ARM::SP, RegState::Define) .addReg(ARM::SP) .add(predOps(ARMCC::AL)) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + .addReg(sortedPushList[0]) + .addReg(sortedPushList[1]); } + if (BackupR4) + BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), BackupR4) + .addReg(ARM::R4) + .add(predOps(ARMCC::AL)); + if (BackupR5) + BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), BackupR5) + .addReg(ARM::R5) + .add(predOps(ARMCC::AL)); + // Emit the relevant DWARF information about the change in stack pointer as // well as where to find both r4 and r5 (the callee-save registers) CFIIndex = @@ -2382,21 +2471,21 @@ BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4)); + nullptr, MRI->getDwarfRegNum(ARM::R5, true), -4)); BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( - nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8)); + nullptr, MRI->getDwarfRegNum(ARM::R4, true), -8)); BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); // mov SR1, sp if (Thumb) { - BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1) + BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ARM::R5) .addReg(ARM::SP) .add(predOps(ARMCC::AL)); } else if (CompareStackPointer) { - BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1) + BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ARM::R5) .addReg(ARM::SP) .add(predOps(ARMCC::AL)) .add(condCodeOp()); @@ -2404,13 +2493,13 @@ // sub SR1, sp, #StackSize if (!CompareStackPointer && Thumb) { - BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1) + BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ARM::R5) .add(condCodeOp()) - .addReg(ScratchReg1) + .addReg(ARM::R5) .addImm(AlignedStackSize) .add(predOps(ARMCC::AL)); } else if (!CompareStackPointer) { - BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1) + BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ARM::R5) .addReg(ARM::SP) .addImm(AlignedStackSize) .add(predOps(ARMCC::AL)) @@ -2424,20 +2513,20 @@ MachineConstantPool *MCP = MF.getConstantPool(); unsigned CPI = MCP->getConstantPoolIndex(NewCPV, 4); - // ldr SR0, [pc, offset(STACK_LIMIT)] - BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0) + // ldr r4, [pc, offset(STACK_LIMIT)] + BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ARM::R4) .addConstantPoolIndex(CPI) .add(predOps(ARMCC::AL)); - // ldr SR0, [SR0] - BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0) - .addReg(ScratchReg0) + // ldr r4, [r4] + BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ARM::R4) + .addReg(ARM::R4) .addImm(0) .add(predOps(ARMCC::AL)); } else { // Get TLS base address from the coprocessor - // mrc p15, #0, SR0, c13, c0, #3 - BuildMI(McrMBB, DL, TII.get(ARM::MRC), ScratchReg0) + // mrc p15, #0, r4, c13, c0, #3 + BuildMI(McrMBB, DL, TII.get(ARM::MRC), ARM::R4) .addImm(15) .addImm(0) .addImm(13) @@ -2450,19 +2539,19 @@ unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1; // Get the stack limit from the right offset - // ldr SR0, [sr0, #4 * TlsOffset] - BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ScratchReg0) - .addReg(ScratchReg0) + // ldr r4, [sr0, #4 * TlsOffset] + BuildMI(GetMBB, DL, TII.get(ARM::LDRi12), ARM::R4) + .addReg(ARM::R4) .addImm(4 * TlsOffset) .add(predOps(ARMCC::AL)); } // Compare stack limit with stack size requested. - // cmp SR0, SR1 + // cmp r4, r5 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr; BuildMI(GetMBB, DL, TII.get(Opcode)) - .addReg(ScratchReg0) - .addReg(ScratchReg1) + .addReg(ARM::R4) + .addReg(ARM::R5) .add(predOps(ARMCC::AL)); // This jump is taken if StackLimit < SP - stack required. @@ -2471,33 +2560,32 @@ .addImm(ARMCC::LO) .addReg(ARM::CPSR); - // Calling __morestack(StackSize, Size of stack arguments). - // __morestack knows that the stack size requested is in SR0(r4) - // and amount size of stack arguments is in SR1(r5). + // __morestack knows that the stack size requested is in r4 + // and amount size of stack arguments is in r5. - // Pass first argument for the __morestack by Scratch Register #0. + // Pass first argument for the __morestack by r4. // The amount size of stack required if (Thumb) { - BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0) + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ARM::R4) .add(condCodeOp()) .addImm(AlignedStackSize) .add(predOps(ARMCC::AL)); } else { - BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0) + BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ARM::R4) .addImm(AlignedStackSize) .add(predOps(ARMCC::AL)) .add(condCodeOp()); } // Pass second argument for the __morestack by Scratch Register #1. - // The amount size of stack consumed to save function arguments. + // Pass second argument for the __morestack by r5. if (Thumb) { - BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1) + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ARM::R5) .add(condCodeOp()) .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) .add(predOps(ARMCC::AL)); } else { - BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1) + BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ARM::R5) .addImm(alignToARMConstant(ARMFI->getArgumentStackSize())) .add(predOps(ARMCC::AL)) .add(condCodeOp()); @@ -2542,9 +2630,9 @@ if (ST->isThumb1Only()) { BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)) .add(predOps(ARMCC::AL)) - .addReg(ScratchReg0); + .addReg(ARM::R4); BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR) - .addReg(ScratchReg0) + .addReg(ARM::R4) .add(predOps(ARMCC::AL)); } else { BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST)) @@ -2562,22 +2650,35 @@ .addReg(ARM::LR); } - // Restore SR0 and SR1 in case of __morestack() was called. + // If r4 and r5 were secured using other registers, restore them from there + // movr r4, sr4 + // movr r5, sr5 + if (BackupR4) + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::R4) + .addReg(BackupR4) + .add(predOps(ARMCC::AL)); + if (BackupR5) + BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::R5) + .addReg(BackupR5) + .add(predOps(ARMCC::AL)); + + // Restore r4 and r5, resp. the secure scratch registers, in case of + // __morestack() was not called. // __morestack() will skip PostStackMBB block so we need to restore // scratch registers from here. - // pop {SR0, SR1} + // pop {r4/sr4, r5/sr5} if (Thumb) { BuildMI(AllocMBB, DL, TII.get(ARM::tPOP)) .add(predOps(ARMCC::AL)) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + .addReg(sortedPushList[0]) + .addReg(sortedPushList[1]); } else { BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD)) .addReg(ARM::SP, RegState::Define) .addReg(ARM::SP) .add(predOps(ARMCC::AL)) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + .addReg(sortedPushList[0]) + .addReg(sortedPushList[1]); } // Update the CFA offset now that we've popped @@ -2589,19 +2690,23 @@ BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL)); // Restore SR0 and SR1 in case of __morestack() was not called. - // pop {SR0, SR1} + // Restore r4 and r5, resp. the secure scratch registers, in case of + // __morestack() was not called. + // movr r4, sr4 + // movr r5, sr5 + // pop {r4/sr4, r5/sr5} if (Thumb) { BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP)) .add(predOps(ARMCC::AL)) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + .addReg(sortedPushList[0]) + .addReg(sortedPushList[1]); } else { BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD)) .addReg(ARM::SP, RegState::Define) .addReg(ARM::SP) .add(predOps(ARMCC::AL)) - .addReg(ScratchReg0) - .addReg(ScratchReg1); + .addReg(sortedPushList[0]) + .addReg(sortedPushList[1]); } // Update the CFA offset now that we've popped @@ -2612,11 +2717,11 @@ // Tell debuggers that r4 and r5 are now the same as they were in the // previous function, that they're the "Same Value". CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue( - nullptr, MRI->getDwarfRegNum(ScratchReg0, true))); + nullptr, MRI->getDwarfRegNum(ARM::R4, true))); BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue( - nullptr, MRI->getDwarfRegNum(ScratchReg1, true))); + nullptr, MRI->getDwarfRegNum(ARM::R5, true))); BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex); Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -628,6 +628,8 @@ bool preferIncOfAddToSubOfNot(EVT VT) const override; + unsigned getScratchRegister(unsigned FramePointer) const; + protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -5572,6 +5572,8 @@ Register ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT, const MachineFunction &MF) const { Register Reg = StringSwitch(RegName) + .Case("r4", ARM::R4) + .Case("r5", ARM::R5) .Case("r6", ARM::R6) .Case("r7", ARM::R7) .Case("r8", ARM::R8) @@ -10217,6 +10219,18 @@ "__chkstk is only supported on Windows"); assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"); + // r4 might be used as global named register, which means its value has not + // been and must not be saved to the stack. Thus temporarily save its + // content in another register so r4 is free to be used for __chkstk + unsigned BackupR4 = ARM::NoRegister; + if (Subtarget->isGPRegisterReserved(ARM::R4 - ARM::GPRRegClass.begin()[0])) { + BackupR4 = getScratchRegister(ARM::R11); + BuildMI(*MBB, MI, DL, TII.get(ARM::tMOVr), ARM::R4) + .addReg(BackupR4) + .add(predOps(ARMCC::AL)); + ; + } + // __chkstk takes the number of words to allocate on the stack in R4, and // returns the stack adjustment in number of bytes in R4. This will not // clober any other registers (other than the obvious lr). @@ -10277,6 +10291,14 @@ .add(predOps(ARMCC::AL)) .add(condCodeOp()); + // If r4 is actually a fixed register, restore it + if (BackupR4) { + BuildMI(*MBB, MI, DL, TII.get(ARM::tMOVr), BackupR4) + .addReg(ARM::R4) + .add(predOps(ARMCC::AL)); + ; + } + MI.eraseFromParent(); return MBB; } @@ -17183,3 +17205,14 @@ MF.getFrameInfo().computeMaxCallFrameSize(MF); TargetLoweringBase::finalizeLowering(MF); } + +unsigned ARMTargetLowering::getScratchRegister(unsigned FramePointer) const { + for (unsigned i = 4; i < ARM::GPRRegClass.getNumRegs() - 4; i++) { + // register must not be reserved nor FP + if (!Subtarget->isGPRegisterReserved(i) && i != FramePointer) { + return ARM::GPRRegClass.getRegister(i); + } + } + report_fatal_error("ran out of registers: Too many registers reserved"); + return ARM::NoRegister; +} Index: llvm/lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- llvm/lib/Target/ARM/ARMTargetTransformInfo.h +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.h @@ -76,10 +76,10 @@ ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls, - ARM::FeatureExecuteOnly, ARM::FeatureReserveR6, ARM::FeatureReserveR7, - ARM::FeatureReserveR8, ARM::FeatureReserveR9, ARM::FeatureReserveR10, - ARM::FeatureReserveR11, ARM::FeatureNoMovt, - ARM::FeatureNoNegativeImmediates + ARM::FeatureExecuteOnly, ARM::FeatureReserveR4, ARM::FeatureReserveR5, + ARM::FeatureReserveR6, ARM::FeatureReserveR7, ARM::FeatureReserveR8, + ARM::FeatureReserveR9, ARM::FeatureReserveR10, ARM::FeatureReserveR11, + ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates }; const ARMSubtarget *getST() const { return ST; } Index: llvm/lib/Target/ARM/Thumb1FrameLowering.h =================================================================== --- llvm/lib/Target/ARM/Thumb1FrameLowering.h +++ llvm/lib/Target/ARM/Thumb1FrameLowering.h @@ -36,6 +36,9 @@ bool hasReservedCallFrame(const MachineFunction &MF) const override; + unsigned getScratchRegister(const MachineFunction &MF, + BitVector inUse = BitVector()) const; + MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, Index: llvm/lib/Target/ARM/Thumb1FrameLowering.cpp =================================================================== --- llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -63,6 +63,15 @@ return !MFI.hasVarSizedObjects(); } +unsigned Thumb1FrameLowering::getScratchRegister(const MachineFunction &MF, + BitVector InUse) const { + unsigned Reg = ARMFrameLowering::getScratchRegister(MF, InUse); + if (isARMLowRegister(Reg)) + return Reg; + report_fatal_error("ran out of registers: Too many registers reserved"); + return ARM::NoRegister; +} + static void emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, @@ -373,16 +382,9 @@ // For a large stack frame, we might need a scratch register to store // the size of the frame. We know all callee-save registers are free // at this point in the prologue, so pick one. - unsigned ScratchRegister = ARM::NoRegister; - for (auto &I : CSI) { - unsigned Reg = I.getReg(); - if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) { - ScratchRegister = Reg; - break; - } - } + unsigned ScratchReg = getScratchRegister(MF); emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, - ScratchRegister, MachineInstr::FrameSetup); + ScratchReg, MachineInstr::FrameSetup); if (!HasFP) { CFAOffset -= NumBytes; unsigned CFIIndex = MF.addFrameInst( @@ -403,31 +405,34 @@ if (RegInfo->needsStackRealignment(MF)) { const unsigned NrBitsToZero = countTrailingZeros(MFI.getMaxAlignment()); - // Emit the following sequence, using R4 as a temporary, since we cannot use - // SP as a source or destination register for the shifts: + // Emit the following sequence, using our scratch register as a temporary, + // since we cannot use SP as a source or destination register for the + // shifts: // mov r4, sp // lsrs r4, r4, #NrBitsToZero // lsls r4, r4, #NrBitsToZero // mov sp, r4 - BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) - .addReg(ARM::SP, RegState::Kill) - .add(predOps(ARMCC::AL)); + // (ex.: scratchReg=r4) + unsigned ScratchReg = getScratchRegister(MF); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ScratchReg) + .addReg(ARM::SP, RegState::Kill) + .add(predOps(ARMCC::AL)); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4) - .addDef(ARM::CPSR) - .addReg(ARM::R4, RegState::Kill) - .addImm(NrBitsToZero) - .add(predOps(ARMCC::AL)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ScratchReg) + .addDef(ARM::CPSR) + .addReg(ScratchReg, RegState::Kill) + .addImm(NrBitsToZero) + .add(predOps(ARMCC::AL)); - BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4) - .addDef(ARM::CPSR) - .addReg(ARM::R4, RegState::Kill) - .addImm(NrBitsToZero) - .add(predOps(ARMCC::AL)); + BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ScratchReg) + .addDef(ARM::CPSR) + .addReg(ScratchReg, RegState::Kill) + .addImm(NrBitsToZero) + .add(predOps(ARMCC::AL)); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) - .addReg(ARM::R4, RegState::Kill) - .add(predOps(ARMCC::AL)); + .addReg(ScratchReg, RegState::Kill) + .add(predOps(ARMCC::AL)); AFI->setShouldRestoreSPFromFP(true); } @@ -511,13 +516,14 @@ // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot, the target is ELF and the function has FP, or // the target uses var sized objects. + unsigned ScratchReg = Thumb1FrameLowering::getScratchRegister(MF); if (NumBytes) { - assert(!MFI.getPristineRegs(MF).test(ARM::R4) && + assert(!MFI.getPristineRegs(MF).test(ScratchReg) && "No scratch register to restore SP from FP!"); - emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, - TII, *RegInfo); + emitThumbRegPlusImmediate(MBB, MBBI, dl, ScratchReg, FramePtr, + -NumBytes, TII, *RegInfo); BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) - .addReg(ARM::R4) + .addReg(ScratchReg) .add(predOps(ARMCC::AL)); } else BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) @@ -527,24 +533,16 @@ // For a large stack frame, we might need a scratch register to store // the size of the frame. We know all callee-save registers are free // at this point in the epilogue, so pick one. - unsigned ScratchRegister = ARM::NoRegister; - bool HasFP = hasFP(MF); - for (auto &I : MFI.getCalleeSavedInfo()) { - unsigned Reg = I.getReg(); - if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) { - ScratchRegister = Reg; - break; - } - } + unsigned ScratchReg = getScratchRegister(MF); if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { MachineBasicBlock::iterator PMBBI = std::prev(MBBI); if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes)) emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes, - ScratchRegister, MachineInstr::NoFlags); + ScratchReg, MachineInstr::NoFlags); } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes, - ScratchRegister, MachineInstr::NoFlags); + ScratchReg, MachineInstr::NoFlags); } } Index: llvm/test/CodeGen/ARM/Windows/chkstk-fixed-r4.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/Windows/chkstk-fixed-r4.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=thumbv7-windows -mcpu=cortex-a9 -filetype asm -mattr=+reserve-r4 %s -o - \ +; RUN: | FileCheck %s + +define arm_aapcs_vfpcc void @check_r4usage() { +entry: + %buffer = alloca [4096 x i8], align 1 + ret void +} + +; CHECK-NOT: push {{{.*}}r4{{.*}}} +; CHECK: push {{{.*}}r5{{.*}}} +; CHECK: mov r5, r4 +; CHECK: movw r4, #1024 +; CHECK: bl __chkstk +; CHECK: sub.w sp, sp, r4 +; CHECK: mov r4, r5 +; CHECK: pop {{{.*}}r5{{.*}}} +; CHECK-NOT: pop {{{.*}}r4{{.*}}} + Index: llvm/test/CodeGen/ARM/Windows/vla-fixed-r4.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/Windows/vla-fixed-r4.ll @@ -0,0 +1,28 @@ +; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -mattr=+reserve-r4 -filetype asm -o - %s \ +; RUN: | FileCheck %s -check-prefix CHECK-SMALL-CODE +; RUN: llc -mtriple=thumbv7-windows-itanium -mcpu=cortex-a9 -code-model=large -mattr=+reserve-r4 -filetype asm -o - %s \ +; RUN: | FileCheck %s -check-prefix CHECK-LARGE-CODE +; RUN: llc -mtriple=thumbv7-windows-msvc -mcpu=cortex-a9 -mattr=+reserve-r4 -filetype asm -o - %s \ +; RUN: | FileCheck %s -check-prefix CHECK-SMALL-CODE + +define arm_aapcs_vfpcc i8 @function(i32 %sz, i32 %idx) { +entry: + %vla = alloca i8, i32 %sz, align 1 + %arrayidx = getelementptr inbounds i8, i8* %vla, i32 %idx + %0 = load volatile i8, i8* %arrayidx, align 1 + ret i8 %0 +} + +; CHECK-SMALL-CODE: adds [[R4:r[0-9]+]], #7 +; CHECK-SMALL-CODE: bic [[R4]], [[R4]], #4 +; CHECK-SMALL-CODE: lsrs r4, [[R4]], #2 +; CHECK-SMALL-CODE: bl __chkstk +; CHECK-SMALL-CODE: sub.w sp, sp, r4 + +; CHECK-LARGE-CODE: adds [[R4:r[0-9]+]], #7 +; CHECK-LARGE-CODE: bic [[R4]], [[R4]], #4 +; CHECK-LARGE-CODE: lsrs r4, [[R4]], #2 +; CHECK-LARGE-CODE: movw [[IP:r[0-9]+]], :lower16:__chkstk +; CHECK-LARGE-CODE: movt [[IP]], :upper16:__chkstk +; CHECK-LARGE-CODE: blx [[IP]] +; CHECK-LARGE-CODE: sub.w sp, sp, r4 Index: llvm/test/CodeGen/ARM/named-reg-alloc.ll =================================================================== --- llvm/test/CodeGen/ARM/named-reg-alloc.ll +++ llvm/test/CodeGen/ARM/named-reg-alloc.ll @@ -4,11 +4,11 @@ define i32 @get_stack() nounwind { entry: ; FIXME: Include an allocatable-specific error message -; CHECK: Invalid register name "r5". +; CHECK: Invalid register name "r3". %sp = call i32 @llvm.read_register.i32(metadata !0) ret i32 %sp } declare i32 @llvm.read_register.i32(metadata) nounwind -!0 = !{!"r5\00"} +!0 = !{!"r3\00"} Index: llvm/test/CodeGen/ARM/reg-alloc-no-alignment.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/reg-alloc-no-alignment.ll @@ -0,0 +1,17 @@ +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -verify-machineinstrs -mattr=+reserve-r10,+reserve-r9,+reserve-r8,+reserve-r4 \ +; RUN: -asm-verbose=false | FileCheck %s + + +declare dso_local i32 @bar(i32*) #1 + +; Reserved registers should not be used to correct alignment. +define hidden i32 @main() #0 { +; CHECK-NOT: r10 +; CHECK-NOT: r9 +; CHECK-NOT: r8 +; CHECK-NOT: r4 +; CHECK: {r5, r7, lr} + %a = alloca i32, i32 4, align 8 + %1 = call i32 @bar (i32* %a) + ret i32 %1 +} \ No newline at end of file Index: llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r4-r5-r6.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r4-r5-r6.ll @@ -0,0 +1,60 @@ +; RUN: llc < %s -mattr=+reserve-r4,+reserve-r5,+reserve-r6 -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s +; +; Equivalent C source code +; void bar(unsigned int i, +; unsigned int j, +; unsigned int k, +; unsigned int l, +; unsigned int m, +; unsigned int n, +; unsigned int o, +; unsigned int p) +; { +; unsigned int result = i + j + k + l +m + n + o + p; +; } + +define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { +entry: +; CHECK-NOT: push {{{.*}}r4{{.*}}} +; CHECK-NOT: push {{{.*}}r5, r6{{.*}}} + %i.addr = alloca i32, align 4 + %j.addr = alloca i32, align 4 + %k.addr = alloca i32, align 4 + %l.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + %o.addr = alloca i32, align 4 + %p.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 %j, i32* %j.addr, align 4 + store i32 %k, i32* %k.addr, align 4 + store i32 %l, i32* %l.addr, align 4 + store i32 %m, i32* %m.addr, align 4 + store i32 %n, i32* %n.addr, align 4 + store i32 %o, i32* %o.addr, align 4 + store i32 %p, i32* %p.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = load i32, i32* %j.addr, align 4 + %add = add i32 %0, %1 + %2 = load i32, i32* %k.addr, align 4 + %add1 = add i32 %add, %2 + %3 = load i32, i32* %l.addr, align 4 + %add2 = add i32 %add1, %3 + %4 = load i32, i32* %m.addr, align 4 + %add3 = add i32 %add2, %4 + %5 = load i32, i32* %n.addr, align 4 + %add4 = add i32 %add3, %5 + %6 = load i32, i32* %o.addr, align 4 + %add5 = add i32 %add4, %6 + %7 = load i32, i32* %p.addr, align 4 + %add6 = add i32 %add5, %7 + store i32 %add6, i32* %result, align 4 +; CHECK-NOT: {{.*}}r4{{.*}} +; CHECK-NOT: {{.*}}r5{{.*}} +; CHECK-NOT: {{.*}}r6{{.*}} +; CHECK: {{.*}}r7{{.*}} + ret void +; CHECK-NOT: pop {{{.*}}r4{{.*}}} +; CHECK-NOT: pop {{{.*}}r5, r6{{.*}}} +} Index: llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r5-r6.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r5-r6.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -mattr=+reserve-r5,+reserve-r6 -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s +; +; Equivalent C source code +; void bar(unsigned int i, +; unsigned int j, +; unsigned int k, +; unsigned int l, +; unsigned int m, +; unsigned int n, +; unsigned int o, +; unsigned int p) +; { +; unsigned int result = i + j + k + l +m + n + o + p; +; } + +define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { +entry: +; CHECK-NOT: push {{{.*}}r5, r6{{.*}}} + %i.addr = alloca i32, align 4 + %j.addr = alloca i32, align 4 + %k.addr = alloca i32, align 4 + %l.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + %o.addr = alloca i32, align 4 + %p.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 %j, i32* %j.addr, align 4 + store i32 %k, i32* %k.addr, align 4 + store i32 %l, i32* %l.addr, align 4 + store i32 %m, i32* %m.addr, align 4 + store i32 %n, i32* %n.addr, align 4 + store i32 %o, i32* %o.addr, align 4 + store i32 %p, i32* %p.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = load i32, i32* %j.addr, align 4 + %add = add i32 %0, %1 + %2 = load i32, i32* %k.addr, align 4 + %add1 = add i32 %add, %2 + %3 = load i32, i32* %l.addr, align 4 + %add2 = add i32 %add1, %3 + %4 = load i32, i32* %m.addr, align 4 + %add3 = add i32 %add2, %4 + %5 = load i32, i32* %n.addr, align 4 + %add4 = add i32 %add3, %5 + %6 = load i32, i32* %o.addr, align 4 + %add5 = add i32 %add4, %6 + %7 = load i32, i32* %p.addr, align 4 + %add6 = add i32 %add5, %7 + store i32 %add6, i32* %result, align 4 +; CHECK: {{.*}}r4{{.*}} +; CHECK-NOT: {{.*}}r5{{.*}} +; CHECK-NOT: {{.*}}r6{{.*}} +; CHECK: {{.*}}r7{{.*}} + ret void +; CHECK-NOT: pop {{{.*}}r5, r6{{.*}}} +} Index: llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r5.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/reg-alloc-with-fixed-reg-r5.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s -mattr=+reserve-r5 -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s +; +; Equivalent C source code +; void bar(unsigned int i, +; unsigned int j, +; unsigned int k, +; unsigned int l, +; unsigned int m, +; unsigned int n, +; unsigned int o, +; unsigned int p) +; { +; unsigned int result = i + j + k + l +m + n + o + p; +; } + +define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { +entry: +; CHECK-NOT: push {{{.*}}r5,{{.*}}} + %i.addr = alloca i32, align 4 + %j.addr = alloca i32, align 4 + %k.addr = alloca i32, align 4 + %l.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + %o.addr = alloca i32, align 4 + %p.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 %j, i32* %j.addr, align 4 + store i32 %k, i32* %k.addr, align 4 + store i32 %l, i32* %l.addr, align 4 + store i32 %m, i32* %m.addr, align 4 + store i32 %n, i32* %n.addr, align 4 + store i32 %o, i32* %o.addr, align 4 + store i32 %p, i32* %p.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = load i32, i32* %j.addr, align 4 + %add = add i32 %0, %1 + %2 = load i32, i32* %k.addr, align 4 + %add1 = add i32 %add, %2 + %3 = load i32, i32* %l.addr, align 4 + %add2 = add i32 %add1, %3 + %4 = load i32, i32* %m.addr, align 4 + %add3 = add i32 %add2, %4 + %5 = load i32, i32* %n.addr, align 4 + %add4 = add i32 %add3, %5 + %6 = load i32, i32* %o.addr, align 4 + %add5 = add i32 %add4, %6 + %7 = load i32, i32* %p.addr, align 4 + %add6 = add i32 %add5, %7 + store i32 %add6, i32* %result, align 4 +; CHECK: {{.*}}r4{{.*}} +; CHECK-NOT: {{.*}}r5{{.*}} +; CHECK: {{.*}}r6{{.*}} + ret void +; CHECK-NOT: pop {{{.*}}r5,{{.*}}} +} Index: llvm/test/CodeGen/ARM/reg-allog-with-fixed-reg-r4.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/reg-allog-with-fixed-reg-r4.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s -mattr=+reserve-r4 -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s +; +; Equivalent C source code +; void bar(unsigned int i, +; unsigned int j, +; unsigned int k, +; unsigned int l, +; unsigned int m, +; unsigned int n, +; unsigned int o, +; unsigned int p) +; { +; unsigned int result = i + j + k + l +m + n + o + p; +; } + +define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { +entry: +; CHECK-NOT: push {{{.*}}r4,{{.*}}} + %i.addr = alloca i32, align 4 + %j.addr = alloca i32, align 4 + %k.addr = alloca i32, align 4 + %l.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + %o.addr = alloca i32, align 4 + %p.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 %j, i32* %j.addr, align 4 + store i32 %k, i32* %k.addr, align 4 + store i32 %l, i32* %l.addr, align 4 + store i32 %m, i32* %m.addr, align 4 + store i32 %n, i32* %n.addr, align 4 + store i32 %o, i32* %o.addr, align 4 + store i32 %p, i32* %p.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = load i32, i32* %j.addr, align 4 + %add = add i32 %0, %1 + %2 = load i32, i32* %k.addr, align 4 + %add1 = add i32 %add, %2 + %3 = load i32, i32* %l.addr, align 4 + %add2 = add i32 %add1, %3 + %4 = load i32, i32* %m.addr, align 4 + %add3 = add i32 %add2, %4 + %5 = load i32, i32* %n.addr, align 4 + %add4 = add i32 %add3, %5 + %6 = load i32, i32* %o.addr, align 4 + %add5 = add i32 %add4, %6 + %7 = load i32, i32* %p.addr, align 4 + %add6 = add i32 %add5, %7 + store i32 %add6, i32* %result, align 4 +; CHECK: {{.*}}r5{{.*}} +; CHECK-NOT: {{.*}}r4{{.*}} +; CHECK: {{.*}}r6{{.*}} + ret void +; CHECK-NOT: pop {{{.*}}r4,{{.*}}} +} Index: llvm/test/CodeGen/ARM/segmented-stacks-fixed-r4-r5.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/segmented-stacks-fixed-r4-r5.ll @@ -0,0 +1,60 @@ +; RUN: not llc < %s -mtriple=arm-linux-androideabi -mattr=+v4t -mattr=+reserve-r4 2>&1 | FileCheck %s -check-prefix=ARM-android-ERROR-r4 +; RUN: not llc < %s -mtriple=arm-linux-androideabi -mattr=+v4t -mattr=+reserve-r5 2>&1 | FileCheck %s -check-prefix=ARM-android-ERROR-r5 +; RUN: not llc < %s -mtriple=arm-linux-unknown-gnueabi -mattr=+v4t -mattr=+reserve-r4 2>&1 | FileCheck %s -check-prefix=ARM-linux-ERROR + +; ARM-android-ERROR-r4: -ffixed-r4 is not allowed for this target when segmented stacks are in use. +; ARM-android-ERROR-r5: -ffixed-r5 is not allowed for this target when segmented stacks are in use. +; ARM-linux-ERROR: -ffixed-r4 is not allowed for this target when segmented stacks are in use. + +; RUN: llc < %s -mtriple=armv7--linux-gnueabihf -mattr=+reserve-r4 -verify-machineinstrs | FileCheck %s -check-prefix=r4-ARM-linux +; RUN: llc < %s -mtriple=armv7--linux-gnueabihf -mattr=+reserve-r4 -mattr=+reserve-r5 -verify-machineinstrs | FileCheck %s -check-prefix=r4-r5-ARM-linux + +; Just to prevent the alloca from being optimized away +declare void @dummy_use(i32*, i32) + +define void @test_basic() #0 { + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) + ret void + +; r4-ARM-linux: push {r5, r6} +; r4-ARM-linux-NEXT: mov r6, r4 +; r4-ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; r4-ARM-linux-NEXT: mov r5, sp +; r4-ARM-linux-NEXT: ldr r4, [r4, #4] +; r4-ARM-linux-NEXT: cmp r4, r5 +; r4-ARM-linux-NEXT: blo .LBB0_2 + +; r4-ARM-linux: mov r4, #48 +; r4-ARM-linux-NEXT: mov r5, #0 +; r4-ARM-linux-NEXT: stmdb sp!, {lr} +; r4-ARM-linux-NEXT: bl __morestack +; r4-ARM-linux-NEXT: ldm sp!, {lr} +; r4-ARM-linux-NEXT: mov r4, r6 +; r4-ARM-linux-NEXT: pop {r5, r6} +; r4-ARM-linux-NEXT: bx lr + + + +; r4-r5-ARM-linux: push {r6, r7} +; r4-r5-ARM-linux-NEXT: mov r6, r4 +; r4-r5-ARM-linux-NEXT: mrc p15, #0, r4, c13, c0, #3 +; r4-r5-ARM-linux-NEXT: mov r7, r5 +; r4-r5-ARM-linux-NEXT: mov r5, sp +; r4-r5-ARM-linux-NEXT: ldr r4, [r4, #4] +; r4-r5-ARM-linux-NEXT: cmp r4, r5 +; r4-r5-ARM-linux-NEXT: blo .LBB0_2 + +; r4-r5-ARM-linux: mov r4, #48 +; r4-r5-ARM-linux-NEXT: mov r5, #0 +; r4-r5-ARM-linux-NEXT: stmdb sp!, {lr} +; r4-r5-ARM-linux-NEXT: bl __morestack +; r4-r5-ARM-linux-NEXT: ldm sp!, {lr} +; r4-r5-ARM-linux-NEXT: mov r4, r6 +; r4-r5-ARM-linux-NEXT: mov r5, r7 +; r4-r5-ARM-linux-NEXT: pop {r6, r7} +; r4-r5-ARM-linux-NEXT: bx lr + +} + +attributes #0 = { "split-stack" } Index: llvm/test/CodeGen/Thumb/callee_save_reserved.ll =================================================================== --- llvm/test/CodeGen/Thumb/callee_save_reserved.ll +++ llvm/test/CodeGen/Thumb/callee_save_reserved.ll @@ -1,10 +1,10 @@ -; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r8 \ +; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs -frame-pointer=none -mattr=+reserve-r6,+reserve-r5 \ ; RUN: -asm-verbose=false | FileCheck --check-prefix=CHECK-INVALID %s ; Reserved low registers should not be used to correct reg deficit. define <4 x i32> @four_high_four_return_reserved() { entry: - ; CHECK-INVALID-NOT: r{{6|8}} + ; CHECK-INVALID-NOT: r{{[5-6]+}} tail call void asm sideeffect "", "~{r8},~{r9}"() %vecinit = insertelement <4 x i32> undef, i32 1, i32 0 %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1 Index: llvm/test/CodeGen/Thumb/emergency-spill-slot.ll =================================================================== --- llvm/test/CodeGen/Thumb/emergency-spill-slot.ll +++ llvm/test/CodeGen/Thumb/emergency-spill-slot.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: .setfp r7, sp, #12 ; CHECK-NEXT: add r7, sp, #12 ; CHECK-NEXT: .pad #4100 -; CHECK-NEXT: ldr r6, .LCPI0_0 -; CHECK-NEXT: add sp, r6 +; CHECK-NEXT: ldr r4, .LCPI0_0 +; CHECK-NEXT: add sp, r4 ; CHECK-NEXT: mov r6, sp ; CHECK-NEXT: adds r0, r0, #7 ; CHECK-NEXT: movs r1, #7 @@ -60,8 +60,8 @@ ; CHECK-NEXT: .save {r4, r5, r6, r7, lr} ; CHECK-NEXT: push {r4, r5, r6, r7, lr} ; CHECK-NEXT: .pad #8196 -; CHECK-NEXT: ldr r7, .LCPI1_0 -; CHECK-NEXT: add sp, r7 +; CHECK-NEXT: ldr r4, .LCPI1_0 +; CHECK-NEXT: add sp, r4 ; CHECK-NEXT: add r0, sp, #4 ; CHECK-NEXT: ldr r1, .LCPI1_2 ; CHECK-NEXT: add r1, sp @@ -74,8 +74,8 @@ ; CHECK-NEXT: ldr r0, [sp] ; CHECK-NEXT: @APP ; CHECK-NEXT: @NO_APP -; CHECK-NEXT: ldr r7, .LCPI1_1 -; CHECK-NEXT: add sp, r7 +; CHECK-NEXT: ldr r4, .LCPI1_1 +; CHECK-NEXT: add sp, r4 ; CHECK-NEXT: pop {r4, r5, r6, r7, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: @@ -120,8 +120,8 @@ ; CHECK-NEXT: .save {r4, r5, r6, lr} ; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: .pad #8196 -; CHECK-NEXT: ldr r6, .LCPI2_0 -; CHECK-NEXT: add sp, r6 +; CHECK-NEXT: ldr r4, .LCPI2_0 +; CHECK-NEXT: add sp, r4 ; CHECK-NEXT: add r0, sp, #4 ; CHECK-NEXT: ldr r1, .LCPI2_2 ; CHECK-NEXT: add r1, sp @@ -134,8 +134,8 @@ ; CHECK-NEXT: ldr r7, [sp] ; CHECK-NEXT: @APP ; CHECK-NEXT: @NO_APP -; CHECK-NEXT: ldr r6, .LCPI2_1 -; CHECK-NEXT: add sp, r6 +; CHECK-NEXT: ldr r4, .LCPI2_1 +; CHECK-NEXT: add sp, r4 ; CHECK-NEXT: pop {r4, r5, r6, pc} ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: @ %bb.1: Index: llvm/test/CodeGen/Thumb2/segmented-stacks-fixed-r4.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/segmented-stacks-fixed-r4.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -mtriple=thumb-linux-androideabi -mcpu=arm1156t2-s -mattr=+thumb2 -mattr=+reserve-r4 -verify-machineinstrs | FileCheck %s -check-prefix=Thumb-android +; RUN: llc < %s -mtriple=thumb-linux-androideabi -mcpu=arm1156t2-s -mattr=+thumb2 -mattr=+reserve-r4 -filetype=obj + + +; Just to prevent the alloca from being optimized away +declare void @dummy_use(i32*, i32) + +define void @test_basic() #0 { + %mem = alloca i32, i32 10 + call void @dummy_use (i32* %mem, i32 10) + ret void + +; Thumb-android: test_basic: + +; Thumb-android: push {r5, r6} +; Thumb-android-NEXT: mov r6, r4 +; Thumb-android-NEXT: mrc p15, #0, r4, c13, c0, #3 +; Thumb-android-NEXT: mov r5, sp +; Thumb-android-NEXT: ldr r4, [r4, #252] +; Thumb-android-NEXT: cmp r4, r5 +; Thumb-android-NEXT: blo .LBB0_2 + +; Thumb-android: mov r4, #48 +; Thumb-android-NEXT: mov r5, #0 +; Thumb-android-NEXT: push {lr} +; Thumb-android-NEXT: bl __morestack +; Thumb-android-NEXT: ldr lr, [sp], #4 +; Thumb-android-NEXT: mov r4, r6 +; Thumb-android-NEXT: pop {r5, r6} +; Thumb-android-NEXT: bx lr + +; Thumb-android: pop {r5, r6} + +} + +attributes #0 = { "split-stack" }