Index: lib/Target/ARM/ARMInstrThumb.td =================================================================== --- lib/Target/ARM/ARMInstrThumb.td +++ lib/Target/ARM/ARMInstrThumb.td @@ -773,6 +773,16 @@ IIC_iStore_bh_r, IIC_iStore_bh_i, "strh", truncstorei16>; +// Pseudo instructions for Thumb1 high-register spills. +let mayStore = 1 in +def tSPILL_HREG_SAVE : + tPseudoInst<(outs), (ins hGPR:$Rt, t_addrmode_sp:$addr), 0, IIC_iStore_i, []>, + Requires<[IsThumb1Only]>, Sched<[WriteST]>; +let mayLoad = 1 in +def tSPILL_HREG_RESTORE : + tPseudoInst<(outs hGPR:$Rt), (ins t_addrmode_sp:$addr), 0, IIC_iLoad_i, []>, + Requires<[IsThumb1Only]>, Sched<[WriteLd]>; + //===----------------------------------------------------------------------===// // Load / store multiple Instructions. Index: lib/Target/ARM/ARMRegisterInfo.td =================================================================== --- lib/Target/ARM/ARMRegisterInfo.td +++ lib/Target/ARM/ARMRegisterInfo.td @@ -205,10 +205,8 @@ def GPR : RegisterClass<"ARM", [i32], 32, (add (sequence "R%u", 0, 12), SP, LR, PC)> { // Allocate LR as the first CSR since it is always saved anyway. - // For Thumb1 mode, we don't want to allocate hi regs at all, as we don't - // know how to spill them. If we make our prologue/epilogue code smarter at - // some point, we can go back to using the above allocation orders for the - // Thumb1 instructions that know how to use hi regs. + // For Thumb1 mode, we don't allocate hi regs as working with them (e.g. + // spilling) can be costly. let AltOrders = [(add LR, GPR), (trunc GPR, 8)]; let AltOrderSelect = [{ return 1 + MF.getSubtarget().isThumb1Only(); Index: lib/Target/ARM/Thumb1InstrInfo.cpp =================================================================== --- lib/Target/ARM/Thumb1InstrInfo.cpp +++ lib/Target/ARM/Thumb1InstrInfo.cpp @@ -80,28 +80,37 @@ unsigned SrcReg, bool isKill, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert((RC == &ARM::tGPRRegClass || - (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - isARMLowRegister(SrcReg))) && "Unknown regclass!"); + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); - if (RC == &ARM::tGPRRegClass || + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); + + if (RC->hasSuperClassEq(&ARM::tGPRRegClass) || (TargetRegisterInfo::isPhysicalRegister(SrcReg) && - isARMLowRegister(SrcReg))) { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = MF.getFrameInfo(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore, - MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); + isARMLowRegister(SrcReg))) BuildMI(MBB, I, DL, get(ARM::tSTRspi)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FI) .addImm(0) .addMemOperand(MMO) .add(predOps(ARMCC::AL)); - } + else if (RC->hasSuperClassEq(&ARM::hGPRRegClass)) + // Callers of storeRegToStackSlot() may expect only a single instruction to + // be added but Thumb1 does not have an instruction that directly stores a + // high register. Insert therefore a pseudo instruction that gets lowered + // after register allocation in eliminateFrameIndex(). + BuildMI(MBB, I, DL, get(ARM::tSPILL_HREG_SAVE)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + else + llvm_unreachable("Unknown reg class!"); } void Thumb1InstrInfo:: @@ -109,27 +118,30 @@ unsigned DestReg, int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - assert((RC->hasSuperClassEq(&ARM::tGPRRegClass) || - (TargetRegisterInfo::isPhysicalRegister(DestReg) && - isARMLowRegister(DestReg))) && "Unknown regclass!"); + DebugLoc DL; + if (I != MBB.end()) + DL = I->getDebugLoc(); + MachineFunction &MF = *MBB.getParent(); + MachineFrameInfo &MFI = MF.getFrameInfo(); + MachineMemOperand *MMO = MF.getMachineMemOperand( + MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, + MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); if (RC->hasSuperClassEq(&ARM::tGPRRegClass) || (TargetRegisterInfo::isPhysicalRegister(DestReg) && - isARMLowRegister(DestReg))) { - DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); - - MachineFunction &MF = *MBB.getParent(); - MachineFrameInfo &MFI = MF.getFrameInfo(); - MachineMemOperand *MMO = MF.getMachineMemOperand( - MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad, - MFI.getObjectSize(FI), MFI.getObjectAlignment(FI)); + isARMLowRegister(DestReg))) BuildMI(MBB, I, DL, get(ARM::tLDRspi), DestReg) .addFrameIndex(FI) .addImm(0) .addMemOperand(MMO) .add(predOps(ARMCC::AL)); - } + else if (RC->hasSuperClassEq(&ARM::hGPRRegClass)) + BuildMI(MBB, I, DL, get(ARM::tSPILL_HREG_RESTORE), DestReg) + .addFrameIndex(FI) + .addImm(0) + .addMemOperand(MMO); + else + llvm_unreachable("Unknown reg class!"); } void Thumb1InstrInfo::expandLoadStackGuard( Index: lib/Target/ARM/ThumbRegisterInfo.h =================================================================== --- lib/Target/ARM/ThumbRegisterInfo.h +++ lib/Target/ARM/ThumbRegisterInfo.h @@ -60,6 +60,14 @@ void eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS = nullptr) const override; + +private: + void eliminateThumb1FrameIndex(MachineBasicBlock::iterator II, int SPAdj, + unsigned FIOperandNum, RegScavenger *RS) const; + void eliminateThumb1FrameIndexFromHighRegSpill(MachineBasicBlock::iterator II, + int SPAdj, + unsigned FIOperandNum, + RegScavenger *RS) const; }; } Index: lib/Target/ARM/ThumbRegisterInfo.cpp =================================================================== --- lib/Target/ARM/ThumbRegisterInfo.cpp +++ lib/Target/ARM/ThumbRegisterInfo.cpp @@ -515,6 +515,26 @@ return ARMBaseRegisterInfo::eliminateFrameIndex(II, SPAdj, FIOperandNum, RS); + // Eliminate frame index from Thumb1 high-register spills. + if (MI.getOpcode() == ARM::tSPILL_HREG_SAVE || + MI.getOpcode() == ARM::tSPILL_HREG_RESTORE) { + eliminateThumb1FrameIndexFromHighRegSpill(II, SPAdj, FIOperandNum, RS); + return; + } + + eliminateThumb1FrameIndex(II, SPAdj, FIOperandNum, RS); +} + +void ThumbRegisterInfo::eliminateThumb1FrameIndex( + MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const ARMSubtarget &STI = MF.getSubtarget(); + assert(STI.isThumb1Only() && + "This eliminateFrameIndex only supports Thumb1!"); + unsigned VReg = 0; const ARMBaseInstrInfo &TII = *STI.getInstrInfo(); DebugLoc dl = MI.getDebugLoc(); @@ -548,8 +568,6 @@ } // Modify MI as necessary to handle as much of 'Offset' as possible - assert(MF.getInfo()->isThumbFunction() && - "This eliminateFrameIndex only supports Thumb1!"); if (rewriteFrameIndex(MI, FIOperandNum, FrameReg, Offset, TII)) return; @@ -619,3 +637,52 @@ if (MI.isPredicable()) MIB.add(predOps(ARMCC::AL)); } + +void ThumbRegisterInfo::eliminateThumb1FrameIndexFromHighRegSpill( + MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, + RegScavenger *RS) const { + MachineInstr &MI = *II; + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const ARMSubtarget &STI = MF.getSubtarget(); + assert(STI.isThumb1Only() && + "This eliminateFrameIndex only supports Thumb1!"); + const ARMBaseInstrInfo &TII = *STI.getInstrInfo(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + + // Elimination of a frame index from Thumb1 high-register spills is done in + // two steps. The pseudo instructions get first expanded into low-register + // stores/loads and then the frame index is eliminated from these new + // instructions. + unsigned LowReg = MRI.createVirtualRegister(&ARM::tGPRRegClass); + unsigned HiReg = MI.getOperand(0).getReg(); + MachineInstr *UpdateMI; + unsigned Opcode = MI.getOpcode(); + if (Opcode == ARM::tSPILL_HREG_SAVE) { + // Emit a MOV from the high reg to the low reg. + BuildMI(MBB, II, MI.getDebugLoc(), TII.get(ARM::tMOVr), LowReg) + .addReg(HiReg, RegState::Kill) + .add(predOps(ARMCC::AL)); + // Store the low register. + UpdateMI = BuildMI(MBB, II, MI.getDebugLoc(), TII.get(ARM::tSTRspi)) + .addReg(LowReg, RegState::Kill) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()) + .add(predOps(ARMCC::AL)); + } else if (Opcode == ARM::tSPILL_HREG_RESTORE) { + // Load the saved value in the low register. + UpdateMI = BuildMI(MBB, II, MI.getDebugLoc(), TII.get(ARM::tLDRspi), LowReg) + .add(MI.getOperand(1)) + .add(MI.getOperand(2)) + .setMemRefs(MI.memoperands_begin(), MI.memoperands_end()) + .add(predOps(ARMCC::AL)); + // Emit a MOV from the low reg to the high reg. + BuildMI(MBB, II, MI.getDebugLoc(), TII.get(ARM::tMOVr), HiReg) + .addReg(LowReg, RegState::Kill) + .add(predOps(ARMCC::AL)); + } else + llvm_unreachable("Unexpected opcode!"); + MBB.erase(II); + eliminateThumb1FrameIndex(UpdateMI->getIterator(), SPAdj, FIOperandNum, RS); +} Index: test/CodeGen/Thumb/high-reg-spill-expand.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb/high-reg-spill-expand.mir @@ -0,0 +1,64 @@ +# RUN: llc -run-pass prologepilog %s -o - | FileCheck %s + +# Check that the tSPILL_HREG_SAVE/RESTORE pseudo instructions get properly +# expanded and have their frame index eliminated when the Prologue/Epilogue +# Insertion pass is run. + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.c" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv6m-none--eabi" + + define void @constraint_h() #0 { + entry: + %i = alloca i32, align 4 + %0 = load i32, i32* %i, align 4 + call void asm sideeffect "@ $0", "h,~{r12}"(i32 %0) + ret void + } + + attributes #0 = { "no-frame-pointer-elim"="true" } + +... +--- +name: constraint_h +tracksRegLiveness: true +stack: + - { id: 0, name: i, size: 4, alignment: 4, stack-id: 0, local-offset: -4 } + - { id: 1, type: spill-slot, size: 4, alignment: 4, stack-id: 0 } +body: | + bb.0.entry: + renamable $r0 = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) + renamable $r12 = COPY killed renamable $r0 + tSPILL_HREG_SAVE killed $r12, %stack.1, 0 :: (store 4 into %stack.1) + $r8 = tSPILL_HREG_RESTORE %stack.1, 0 :: (load 4 from %stack.1) + INLINEASM &"@ $0", 1, 589833, killed renamable $r8, 12, implicit-def early-clobber $r12 + tBX_RET 14, $noreg + +... +# CHECK: bb.0.entry: +# CHECK-NEXT: liveins: $r6, $lr, $r8 +# CHECK-NEXT: {{ }} +# CHECK-NEXT: frame-setup tPUSH 14, $noreg, killed $r6, killed $r7, killed $lr, implicit-def $sp, implicit $sp +# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 12 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r6, -12 +# CHECK-NEXT: $r7 = frame-setup tADDrSPi $sp, 1, 14, $noreg +# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $r7, 8 +# CHECK-NEXT: $lr = tMOVr killed $r8, 14, $noreg +# CHECK-NEXT: tPUSH 14, $noreg, killed $lr, implicit-def $sp, implicit $sp +# CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $r8, -16 +# CHECK-NEXT: $sp = frame-setup tSUBspi $sp, 2, 14, $noreg +# CHECK-NEXT: renamable $r0 = tLDRspi $sp, 1, 14, $noreg :: (dereferenceable load 4 from %ir.i) +# CHECK-NEXT: renamable $r12 = COPY killed renamable $r0 +# CHECK-NEXT: $r0 = tMOVr killed $r12, 14, $noreg +# CHECK-NEXT: tSTRspi killed $r0, $sp, 0, 14, $noreg :: (store 4 into %stack.1) +# CHECK-NEXT: $r0 = tLDRspi $sp, 0, 14, $noreg :: (load 4 from %stack.1) +# CHECK-NEXT: $r8 = tMOVr killed $r0, 14, $noreg +# CHECK-NEXT: INLINEASM &"@ $0", 1, 589833, killed renamable $r8, 12, implicit-def early-clobber $r12 +# CHECK-NEXT: $sp = tADDspi $sp, 2, 14, $noreg +# CHECK-NEXT: tPOP 14, $noreg, def $r0, implicit-def $sp, implicit $sp +# CHECK-NEXT: $r8 = tMOVr killed $r0, 14, $noreg +# CHECK-NEXT: tPOP_RET 14, $noreg, def $r6, def $r7, def $pc, implicit-def $sp, implicit $sp Index: test/CodeGen/Thumb/high-reg-spill.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb/high-reg-spill.mir @@ -0,0 +1,50 @@ +# RUN: llc -run-pass regallocfast %s -o - | FileCheck %s + +# This test examines register allocation and spilling with Fast Register +# Allocator. The test uses inline assembler that requests an input variable to +# be loaded in a high register but at the same time has r12 marked as clobbered. +# The allocator initially satisfies the load request by selecting r12 but then +# needs to spill this register when it reaches the INLINEASM instruction and +# notices its clobber definition. +# +# The test checks that the compiler is able to spill a high register in Thumb1 +# by inserting the tSPILL_HREG_SAVE/RESTORE pseudo instructions. + +--- | + ; ModuleID = 'test.ll' + source_filename = "test.c" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv6m-none--eabi" + + define dso_local void @constraint_h() { + entry: + %i = alloca i32, align 4 + %0 = load i32, i32* %i, align 4 + call void asm sideeffect "@ $0", "h,~{r12}"(i32 %0) + ret void + } + +... +--- +name: constraint_h +tracksRegLiveness: true +registers: + - { id: 0, class: hgpr } + - { id: 1, class: tgpr } +stack: + - { id: 0, name: i, size: 4, alignment: 4, stack-id: 0, local-offset: -4 } +body: | + bb.0.entry: + %1:tgpr = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) + %0:hgpr = COPY %1 + INLINEASM &"@ $0", 1, 589833, %0, 12, implicit-def early-clobber $r12 + tBX_RET 14, $noreg + +... +# CHECK: bb.0.entry: +# CHECK-NEXT: renamable $r0 = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i) +# CHECK-NEXT: renamable $r12 = COPY killed renamable $r0 +# CHECK-NEXT: tSPILL_HREG_SAVE killed $r12, %stack.1, 0 :: (store 4 into %stack.1) +# CHECK-NEXT: $r8 = tSPILL_HREG_RESTORE %stack.1, 0 :: (load 4 from %stack.1) +# CHECK-NEXT: INLINEASM &"@ $0", 1, 589833, killed renamable $r8, 12, implicit-def early-clobber $r12 +# CHECK-NEXT: tBX_RET 14, $noreg