Index: lib/Target/ARM/ARM.td =================================================================== --- lib/Target/ARM/ARM.td +++ lib/Target/ARM/ARM.td @@ -335,9 +335,11 @@ "Enable the generation of " "execute only code.">; -def FeatureReserveR9 : SubtargetFeature<"reserve-r9", "ReserveR9", "true", - "Reserve R9, making it unavailable" - " as GPR">; +foreach i = {5-11} in + def FeatureReserveR#i : SubtargetFeature<"reserve-r"#i, + "ReserveRRegister["#i#"]", "true", + "Reserve R"#i#", making it " + "unavailable as a GPR">; def FeatureNoMovt : SubtargetFeature<"no-movt", "NoMovt", "true", "Don't use movt/movw pairs for " Index: lib/Target/ARM/ARMAsmPrinter.cpp =================================================================== --- lib/Target/ARM/ARMAsmPrinter.cpp +++ lib/Target/ARM/ARMAsmPrinter.cpp @@ -763,7 +763,7 @@ if (STI.isRWPI()) ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9IsSB); - else if (STI.isR9Reserved()) + else if (STI.isRRegisterReserved(9)) ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9Reserved); else Index: lib/Target/ARM/ARMBaseRegisterInfo.cpp =================================================================== --- lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -190,9 +190,11 @@ markSuperRegs(Reserved, getFramePointerReg(STI)); if (hasBasePointer(MF)) markSuperRegs(Reserved, BasePtr); - // Some targets reserve R9. - if (STI.isR9Reserved()) - markSuperRegs(Reserved, ARM::R9); + for (size_t R = 0; R < ARM::GPRRegClass.getNumRegs(); ++R) { + if (STI.isRRegisterReserved(R)) { + markSuperRegs(Reserved, ARM::R0 + R); + } + } // Reserve D16-D31 if the subtarget doesn't support them. if (!STI.hasVFP3() || STI.hasD16()) { static_assert(ARM::D31 == ARM::D16 + 15, "Register list not consecutive!"); @@ -268,7 +270,7 @@ case ARM::GPRRegClassID: { bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed() ? TFI->hasFP(MF) : true; - return 10 - HasFP - (STI.isR9Reserved() ? 1 : 0); + return 10 - HasFP - STI.getNumRRegisterReserved(); } case ARM::SPRRegClassID: // Currently not used as 'rep' register class. case ARM::DPRRegClassID: Index: lib/Target/ARM/ARMFrameLowering.cpp =================================================================== --- lib/Target/ARM/ARMFrameLowering.cpp +++ lib/Target/ARM/ARMFrameLowering.cpp @@ -1662,6 +1662,21 @@ const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; + if (STI.isRWPI() && Reg == ARM::R9) { + // Paranoid check for use of R9 with RWPI. Clobbering R9 with -frwpi will + // emit warnings about undefined behaviour but maybe theres's a valid use + // case so on that basis allow it to be pushed/popped in the + // prologue/epilogue. + } else if (Reg > ARM::R0 && ARM::GPRRegClass.contains(Reg) && + STI.isRRegisterReserved(Reg - ARM::R0)) { + LLVM_DEBUG(dbgs() + << printReg(Reg, TRI) + << " has been reserved and" + << " should not be allocatable" + << " or spillable.\n"); + SavedRegs.reset(Reg); + continue; + } bool Spilled = false; if (SavedRegs.test(Reg)) { Spilled = true; @@ -1850,7 +1865,7 @@ LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is saved low register, RegDeficit = " << RegDeficit << "\n"); - } else { + } else if (!STI.isRRegisterReserved(Reg - ARM::R0)) { AvailableRegs.push_back(Reg); LLVM_DEBUG( dbgs() @@ -1865,7 +1880,7 @@ --RegDeficit; LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = " << RegDeficit << "\n"); - } else { + } else if (!STI.isRRegisterReserved(7)) { AvailableRegs.push_back(ARM::R7); LLVM_DEBUG( dbgs() Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -5063,9 +5063,16 @@ unsigned ARMTargetLowering::getRegisterByName(const char* RegName, EVT VT, SelectionDAG &DAG) const { unsigned Reg = StringSwitch(RegName) + .Case("r5", ARM::R5) + .Case("r6", ARM::R6) + .Case("r7", ARM::R7) + .Case("r8", ARM::R8) + .Case("r9", ARM::R9) + .Case("r10", ARM::R10) + .Case("r11", ARM::R11) .Case("sp", ARM::SP) - .Default(0); - if (Reg) + .Default(ARM::NoRegister); + if (Reg != ARM::NoRegister) return Reg; report_fatal_error(Twine("Invalid register name \"" + StringRef(RegName) + "\".")); Index: lib/Target/ARM/ARMSubtarget.h =================================================================== --- lib/Target/ARM/ARMSubtarget.h +++ lib/Target/ARM/ARMSubtarget.h @@ -211,8 +211,8 @@ /// NoARM - True if subtarget does not support ARM mode execution. bool NoARM = false; - /// ReserveR9 - True if R9 is not available as a general purpose register. - bool ReserveR9 = false; + // ReserveRRegister[i] - R#i is not available as a general purpose register. + BitVector ReserveRRegister; /// NoMovt - True if MOVT / MOVW pairs are not used for materialization of /// 32-bit imms (including global addresses). @@ -718,9 +718,20 @@ bool isAClass() const { return ARMProcClass == AClass; } bool isReadTPHard() const { return ReadTPHard; } - bool isR9Reserved() const { - return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9; + bool isRRegisterReserved(size_t i) const { + if (i == 9 && isTargetMachO() && !HasV6Ops) { + return true; + } + return ReserveRRegister[i]; } + unsigned getNumRRegisterReserved() const { + unsigned result = ReserveRRegister.count(); + if (!ReserveRRegister[9] && isRRegisterReserved(9)) { + ++result; + } + return result; + } + bool useR7AsFramePointer() const { return isTargetDarwin() || (!isTargetWindows() && isThumb()); Index: lib/Target/ARM/ARMSubtarget.cpp =================================================================== --- lib/Target/ARM/ARMSubtarget.cpp +++ lib/Target/ARM/ARMSubtarget.cpp @@ -94,7 +94,9 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const ARMBaseTargetMachine &TM, bool IsLittle) - : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps), + : ARMGenSubtargetInfo(TT, CPU, FS), + UseMulOps(UseFusedMulOps), + ReserveRRegister(ARM::GPRRegClass.getNumRegs()), CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options), TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)), // At this point initializeSubtargetDependencies has been called so @@ -250,7 +252,7 @@ UseNEONForSinglePrecisionFP = true; if (isRWPI()) - ReserveR9 = true; + ReserveRRegister.set(9); // FIXME: Teach TableGen to deal with these instead of doing it manually here. switch (ARMProcFamily) { Index: lib/Target/ARM/ARMTargetTransformInfo.h =================================================================== --- lib/Target/ARM/ARMTargetTransformInfo.h +++ lib/Target/ARM/ARMTargetTransformInfo.h @@ -77,7 +77,9 @@ ARM::FeatureDSP, ARM::FeatureMP, ARM::FeatureVirtualization, ARM::FeatureMClass, ARM::FeatureRClass, ARM::FeatureAClass, ARM::FeatureNaClTrap, ARM::FeatureStrictAlign, ARM::FeatureLongCalls, - ARM::FeatureExecuteOnly, ARM::FeatureReserveR9, ARM::FeatureNoMovt, + ARM::FeatureExecuteOnly, ARM::FeatureReserveR5, ARM::FeatureReserveR6, + ARM::FeatureReserveR7, ARM::FeatureReserveR8, ARM::FeatureReserveR9, + ARM::FeatureReserveR10, ARM::FeatureReserveR11, ARM::FeatureNoMovt, ARM::FeatureNoNegativeImmediates }; Index: test/CodeGen/ARM/named-reg-alloc.ll =================================================================== --- test/CodeGen/ARM/named-reg-alloc.ll +++ test/CodeGen/ARM/named-reg-alloc.ll @@ -4,11 +4,11 @@ define i32 @get_stack() nounwind { entry: ; FIXME: Include an allocatable-specific error message -; CHECK: Invalid register name "r5". +; CHECK: Invalid register name "r3". %sp = call i32 @llvm.read_register.i32(metadata !0) ret i32 %sp } declare i32 @llvm.read_register.i32(metadata) nounwind -!0 = !{!"r5\00"} +!0 = !{!"r3\00"} Index: test/CodeGen/ARM/reg-alloc-with-fixed-reg-r5-r6.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/reg-alloc-with-fixed-reg-r5-r6.ll @@ -0,0 +1,58 @@ +; RUN: llc < %s -mattr=+reserve-r5,+reserve-r6 -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s +; +; Equivalent C source code +; void bar(unsigned int i, +; unsigned int j, +; unsigned int k, +; unsigned int l, +; unsigned int m, +; unsigned int n, +; unsigned int o, +; unsigned int p) +; { +; unsigned int result = i + j + k + l +m + n + o + p; +; } + +define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { +entry: +; CHECK-NOT: push {{{.*}}r5, r6{{.*}}} + %i.addr = alloca i32, align 4 + %j.addr = alloca i32, align 4 + %k.addr = alloca i32, align 4 + %l.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + %o.addr = alloca i32, align 4 + %p.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 %j, i32* %j.addr, align 4 + store i32 %k, i32* %k.addr, align 4 + store i32 %l, i32* %l.addr, align 4 + store i32 %m, i32* %m.addr, align 4 + store i32 %n, i32* %n.addr, align 4 + store i32 %o, i32* %o.addr, align 4 + store i32 %p, i32* %p.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = load i32, i32* %j.addr, align 4 + %add = add i32 %0, %1 + %2 = load i32, i32* %k.addr, align 4 + %add1 = add i32 %add, %2 + %3 = load i32, i32* %l.addr, align 4 + %add2 = add i32 %add1, %3 + %4 = load i32, i32* %m.addr, align 4 + %add3 = add i32 %add2, %4 + %5 = load i32, i32* %n.addr, align 4 + %add4 = add i32 %add3, %5 + %6 = load i32, i32* %o.addr, align 4 + %add5 = add i32 %add4, %6 + %7 = load i32, i32* %p.addr, align 4 + %add6 = add i32 %add5, %7 + store i32 %add6, i32* %result, align 4 +; CHECK: {{.*}}r4{{.*}} +; CHECK-NOT: {{.*}}r5{{.*}} +; CHECK-NOT: {{.*}}r6{{.*}} +; CHECK: {{.*}}r7{{.*}} + ret void +; CHECK-NOT: pop {{{.*}}r5, r6{{.*}}} +} Index: test/CodeGen/ARM/reg-alloc-with-fixed-reg-r5.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/reg-alloc-with-fixed-reg-r5.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s -mattr=+reserve-r5 -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s +; +; Equivalent C source code +; void bar(unsigned int i, +; unsigned int j, +; unsigned int k, +; unsigned int l, +; unsigned int m, +; unsigned int n, +; unsigned int o, +; unsigned int p) +; { +; unsigned int result = i + j + k + l +m + n + o + p; +; } + +define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { +entry: +; CHECK-NOT: push {{{.*}}r5,{{.*}}} + %i.addr = alloca i32, align 4 + %j.addr = alloca i32, align 4 + %k.addr = alloca i32, align 4 + %l.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + %o.addr = alloca i32, align 4 + %p.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 %j, i32* %j.addr, align 4 + store i32 %k, i32* %k.addr, align 4 + store i32 %l, i32* %l.addr, align 4 + store i32 %m, i32* %m.addr, align 4 + store i32 %n, i32* %n.addr, align 4 + store i32 %o, i32* %o.addr, align 4 + store i32 %p, i32* %p.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = load i32, i32* %j.addr, align 4 + %add = add i32 %0, %1 + %2 = load i32, i32* %k.addr, align 4 + %add1 = add i32 %add, %2 + %3 = load i32, i32* %l.addr, align 4 + %add2 = add i32 %add1, %3 + %4 = load i32, i32* %m.addr, align 4 + %add3 = add i32 %add2, %4 + %5 = load i32, i32* %n.addr, align 4 + %add4 = add i32 %add3, %5 + %6 = load i32, i32* %o.addr, align 4 + %add5 = add i32 %add4, %6 + %7 = load i32, i32* %p.addr, align 4 + %add6 = add i32 %add5, %7 + store i32 %add6, i32* %result, align 4 +; CHECK: {{.*}}r4{{.*}} +; CHECK-NOT: {{.*}}r5{{.*}} +; CHECK: {{.*}}r6{{.*}} + ret void +; CHECK-NOT: pop {{{.*}}r5,{{.*}}} +} Index: test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6-modified.ll @@ -0,0 +1,66 @@ +; RUN: llc < %s -mattr=+reserve-r6 -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s +; +; Equivalent C source code +; register unsigned r6 asm("r6"); +; void bar(unsigned int i, +; unsigned int j, +; unsigned int k, +; unsigned int l, +; unsigned int m, +; unsigned int n, +; unsigned int o, +; unsigned int p) +; { +; r6 = 10; +; unsigned int result = i + j + k + l +m + n + o + p; +; } +declare void @llvm.write_register.i32(metadata, i32) nounwind + +define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { +entry: +; CHECK-NOT: push {{{.*}}r6,{{.*}}} + %i.addr = alloca i32, align 4 + %j.addr = alloca i32, align 4 + %k.addr = alloca i32, align 4 + %l.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + %o.addr = alloca i32, align 4 + %p.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 %j, i32* %j.addr, align 4 + store i32 %k, i32* %k.addr, align 4 + store i32 %l, i32* %l.addr, align 4 + store i32 %m, i32* %m.addr, align 4 + store i32 %n, i32* %n.addr, align 4 + store i32 %o, i32* %o.addr, align 4 + store i32 %p, i32* %p.addr, align 4 + ; r6 = 10 + call void @llvm.write_register.i32(metadata !0, i32 10) +; CHECK: {{.*}}mov{{.*}}r6,{{.*}} + %0 = load i32, i32* %i.addr, align 4 + %1 = load i32, i32* %j.addr, align 4 + %add = add i32 %0, %1 + %2 = load i32, i32* %k.addr, align 4 + %add1 = add i32 %add, %2 + %3 = load i32, i32* %l.addr, align 4 + %add2 = add i32 %add1, %3 + %4 = load i32, i32* %m.addr, align 4 + %add3 = add i32 %add2, %4 + %5 = load i32, i32* %n.addr, align 4 + %add4 = add i32 %add3, %5 + %6 = load i32, i32* %o.addr, align 4 + %add5 = add i32 %add4, %6 + %7 = load i32, i32* %p.addr, align 4 + %add6 = add i32 %add5, %7 + store i32 %add6, i32* %result, align 4 +; CHECK: {{.*}}r5{{.*}} +; CHECK-NOT: {{.*}}r6{{.*}} +; CHECK: {{.*}}r7{{.*}} + ret void +; CHECK-NOT: pop {{{.*}}r6,{{.*}}} +} + +!llvm.named.register.r6 = !{!0} +!0 = !{!"r6"} Index: test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/reg-alloc-with-fixed-reg-r6.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s -mattr=+reserve-r6 -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s +; +; Equivalent C source code +; void bar(unsigned int i, +; unsigned int j, +; unsigned int k, +; unsigned int l, +; unsigned int m, +; unsigned int n, +; unsigned int o, +; unsigned int p) +; { +; unsigned int result = i + j + k + l +m + n + o + p; +; } + +define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { +entry: +; CHECK-NOT: push {{{.*}}r6,{{.*}}} + %i.addr = alloca i32, align 4 + %j.addr = alloca i32, align 4 + %k.addr = alloca i32, align 4 + %l.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + %o.addr = alloca i32, align 4 + %p.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 %j, i32* %j.addr, align 4 + store i32 %k, i32* %k.addr, align 4 + store i32 %l, i32* %l.addr, align 4 + store i32 %m, i32* %m.addr, align 4 + store i32 %n, i32* %n.addr, align 4 + store i32 %o, i32* %o.addr, align 4 + store i32 %p, i32* %p.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = load i32, i32* %j.addr, align 4 + %add = add i32 %0, %1 + %2 = load i32, i32* %k.addr, align 4 + %add1 = add i32 %add, %2 + %3 = load i32, i32* %l.addr, align 4 + %add2 = add i32 %add1, %3 + %4 = load i32, i32* %m.addr, align 4 + %add3 = add i32 %add2, %4 + %5 = load i32, i32* %n.addr, align 4 + %add4 = add i32 %add3, %5 + %6 = load i32, i32* %o.addr, align 4 + %add5 = add i32 %add4, %6 + %7 = load i32, i32* %p.addr, align 4 + %add6 = add i32 %add5, %7 + store i32 %add6, i32* %result, align 4 +; CHECK: {{.*}}r5{{.*}} +; CHECK-NOT: {{.*}}r6{{.*}} +; CHECK: {{.*}}r7{{.*}} + ret void +; CHECK-NOT: pop {{{.*}}r6,{{.*}}} +} Index: test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/reg-alloc-wout-fixed-regs.ll @@ -0,0 +1,57 @@ +; RUN: llc < %s -mtriple=arm-linux-gnueabi -O0 -filetype=asm --regalloc=fast 2>&1 | FileCheck %s +; +; Equivalent C source code +; void bar(unsigned int i, +; unsigned int j, +; unsigned int k, +; unsigned int l, +; unsigned int m, +; unsigned int n, +; unsigned int o, +; unsigned int p) +; { +; unsigned int result = i + j + k + l +m + n + o + p; +; } + +define void @bar(i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p) nounwind { +entry: +; CHECK: push {{{.*}}r4, r5{{.*}}} + %i.addr = alloca i32, align 4 + %j.addr = alloca i32, align 4 + %k.addr = alloca i32, align 4 + %l.addr = alloca i32, align 4 + %m.addr = alloca i32, align 4 + %n.addr = alloca i32, align 4 + %o.addr = alloca i32, align 4 + %p.addr = alloca i32, align 4 + %result = alloca i32, align 4 + store i32 %i, i32* %i.addr, align 4 + store i32 %j, i32* %j.addr, align 4 + store i32 %k, i32* %k.addr, align 4 + store i32 %l, i32* %l.addr, align 4 + store i32 %m, i32* %m.addr, align 4 + store i32 %n, i32* %n.addr, align 4 + store i32 %o, i32* %o.addr, align 4 + store i32 %p, i32* %p.addr, align 4 + %0 = load i32, i32* %i.addr, align 4 + %1 = load i32, i32* %j.addr, align 4 + %add = add i32 %0, %1 + %2 = load i32, i32* %k.addr, align 4 + %add1 = add i32 %add, %2 + %3 = load i32, i32* %l.addr, align 4 + %add2 = add i32 %add1, %3 + %4 = load i32, i32* %m.addr, align 4 + %add3 = add i32 %add2, %4 + %5 = load i32, i32* %n.addr, align 4 + %add4 = add i32 %add3, %5 + %6 = load i32, i32* %o.addr, align 4 + %add5 = add i32 %add4, %6 + %7 = load i32, i32* %p.addr, align 4 + %add6 = add i32 %add5, %7 + store i32 %add6, i32* %result, align 4 +; CHECK: {{.*}}r4{{.*}} +; CHECK: {{.*}}r5{{.*}} + +; CHECK: pop {{{.*}}r4, r5{{.*}}} + ret void +}