Index: lib/Target/ARM/ARMAsmPrinter.cpp =================================================================== --- lib/Target/ARM/ARMAsmPrinter.cpp +++ lib/Target/ARM/ARMAsmPrinter.cpp @@ -1071,7 +1071,6 @@ const TargetRegisterInfo *TargetRegInfo = MF.getSubtarget().getRegisterInfo(); const MachineRegisterInfo &MachineRegInfo = MF.getRegInfo(); - const ARMFunctionInfo &AFI = *MF.getInfo(); unsigned FramePtr = TargetRegInfo->getFrameRegister(MF); unsigned Opc = MI->getOpcode(); @@ -1135,7 +1134,12 @@ Pad += Width; continue; } - RegList.push_back(MO.getReg()); + // Check for registers that are remapped (for a Thumb1 prologue that + // saves high registers). + unsigned Reg = MO.getReg(); + if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(Reg)) + Reg = RemappedReg; + RegList.push_back(Reg); } break; case ARM::STR_PRE_IMM: @@ -1185,7 +1189,7 @@ unsigned CPI = MI->getOperand(1).getIndex(); const MachineConstantPool *MCP = MF.getConstantPool(); if (CPI >= MCP->getConstants().size()) - CPI = AFI.getOriginalCPIdx(CPI); + CPI = AFI->getOriginalCPIdx(CPI); assert(CPI != -1U && "Invalid constpool index"); // Derive the actual offset. @@ -1215,8 +1219,12 @@ } else if (DstReg == ARM::SP) { MI->print(errs()); llvm_unreachable("Unsupported opcode for unwinding information"); - } - else { + } else if (Opc == ARM::tMOVr) { + // If a Thumb1 function spills r8-r11, we copy the values to low + // registers before pushing them. Record the copy so we can emit the + // correct ".save" later. + AFI->EHPrologueRemappedRegs[DstReg] = SrcReg; + } else { MI->print(errs()); llvm_unreachable("Unsupported opcode for unwinding information"); } Index: lib/Target/ARM/ARMMachineFunctionInfo.h =================================================================== --- lib/Target/ARM/ARMMachineFunctionInfo.h +++ lib/Target/ARM/ARMMachineFunctionInfo.h @@ -245,6 +245,8 @@ void setPromotedConstpoolIncrease(int Sz) { PromotedGlobalsIncrease = Sz; } + + DenseMap EHPrologueRemappedRegs; }; } // end namespace llvm Index: lib/Target/ARM/Thumb1FrameLowering.cpp =================================================================== --- lib/Target/ARM/Thumb1FrameLowering.cpp +++ lib/Target/ARM/Thumb1FrameLowering.cpp @@ -889,8 +889,9 @@ findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd); // Create the PUSH, but don't insert it yet (the MOVs need to come first). - MachineInstrBuilder PushMIB = - BuildMI(MF, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); + MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameSetup); SmallVector RegsToPush; while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) { @@ -903,7 +904,8 @@ BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) .addReg(*CopyReg, RegState::Define) .addReg(*HiRegToSave, getKillRegState(isKill)) - .add(predOps(ARMCC::AL)); + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameSetup); // Record the register that must be added to the PUSH. RegsToPush.push_back(*CopyReg); Index: test/CodeGen/Thumb/callee_save.ll =================================================================== --- test/CodeGen/Thumb/callee_save.ll +++ test/CodeGen/Thumb/callee_save.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=thumbv6m-none-eabi < %s | FileCheck %s declare i8* @llvm.returnaddress(i32) @@ -6,10 +7,14 @@ ; only need to save the low registers. define void @low_regs_only() { ; CHECK-LABEL: low_regs_only: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: @APP +; CHECK-NEXT: @NO_APP +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: -; CHECK: push {r4, r5, r6, r7, lr} tail call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7}"() -; CHECK: pop {r4, r5, r6, r7, pc} ret void } @@ -17,12 +22,17 @@ ; argument/return register to help save/restore it. define void @one_high() { ; CHECK-LABEL: one_high: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: .save {r8} +; CHECK-NEXT: push {r3} +; CHECK-NEXT: @APP +; CHECK-NEXT: @NO_APP +; CHECK-NEXT: pop {r0} +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: bx lr entry: -; CHECK: mov [[SAVEREG:r[0-3]]], r8 -; CHECK: push {[[SAVEREG]]} tail call void asm sideeffect "", "~{r8}"() -; CHECK: pop {[[RESTOREREG:r[0-3]]]} -; CHECK: mov r8, [[RESTOREREG]] ret void } @@ -30,18 +40,23 @@ ; 4 arg/return regs for the save/restore. define void @four_high() { ; CHECK-LABEL: four_high: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: mov r3, r11 +; CHECK-NEXT: mov r2, r10 +; CHECK-NEXT: mov r1, r9 +; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: .save {r8, r9, r10, r11} +; CHECK-NEXT: push {r0, r1, r2, r3} +; CHECK-NEXT: @APP +; CHECK-NEXT: @NO_APP +; CHECK-NEXT: pop {r0, r1, r2, r3} +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: mov r11, r3 +; CHECK-NEXT: bx lr entry: -; CHECK: mov r3, r11 -; CHECK: mov r2, r10 -; CHECK: mov r1, r9 -; CHECK: mov r0, r8 -; CHECK: push {r0, r1, r2, r3} tail call void asm sideeffect "", "~{r8},~{r9},~{r10},~{r11}"() -; CHECK: pop {r0, r1, r2, r3} -; CHECK: mov r8, r0 -; CHECK: mov r9, r1 -; CHECK: mov r10, r2 -; CHECK: mov r11, r3 ret void } @@ -50,14 +65,19 @@ ; lr to save/restore r8. define void @one_high_one_low() { ; CHECK-LABEL: one_high_one_low: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r7, lr} +; CHECK-NEXT: push {r4, r7, lr} +; CHECK-NEXT: mov lr, r8 +; CHECK-NEXT: .save {r8} +; CHECK-NEXT: push {lr} +; CHECK-NEXT: @APP +; CHECK-NEXT: @NO_APP +; CHECK-NEXT: pop {r0} +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: pop {r4, r7, pc} entry: -; CHECK: push {r4, r7, lr} -; CHECK: mov [[SAVEREG:r0|r1|r2|r3|r4|r7|lr]], r8 -; CHECK: push {[[SAVEREG]]} tail call void asm sideeffect "", "~{r4},~{r8}"() -; CHECK: pop {[[RESTOREREG:r0|r1|r2|r3|r4|r7]]} -; CHECK: mov r8, [[RESTOREREG]] -; CHECK: pop {r4, r7, pc} ret void } @@ -65,20 +85,25 @@ ; first push so can be used for pushing the high registers. define void @four_high_four_low() { ; CHECK-LABEL: four_high_four_low: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: mov lr, r11 +; CHECK-NEXT: mov r7, r10 +; CHECK-NEXT: mov r6, r9 +; CHECK-NEXT: mov r5, r8 +; CHECK-NEXT: .save {r8, r9, r10, r11} +; CHECK-NEXT: push {r5, r6, r7, lr} +; CHECK-NEXT: @APP +; CHECK-NEXT: @NO_APP +; CHECK-NEXT: pop {r0, r1, r2, r3} +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: mov r11, r3 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: -; CHECK: push {r4, r5, r6, r7, lr} -; CHECK: mov lr, r11 -; CHECK: mov r7, r10 -; CHECK: mov r6, r9 -; CHECK: mov r5, r8 -; CHECK: push {r5, r6, r7, lr} tail call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"() -; CHECK: pop {r0, r1, r2, r3} -; CHECK: mov r8, r0 -; CHECK: mov r9, r1 -; CHECK: mov r10, r2 -; CHECK: mov r11, r3 -; CHECK: pop {r4, r5, r6, r7, pc} ret void } @@ -87,21 +112,27 @@ ; cannot be used while saving/restoring the high regs. define void @four_high_four_low_frame_ptr() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: four_high_four_low_frame_ptr: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .setfp r7, sp, #12 +; CHECK-NEXT: add r7, sp, #12 +; CHECK-NEXT: mov lr, r11 +; CHECK-NEXT: mov r6, r10 +; CHECK-NEXT: mov r5, r9 +; CHECK-NEXT: mov r4, r8 +; CHECK-NEXT: .save {r8, r9, r10, r11} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: @APP +; CHECK-NEXT: @NO_APP +; CHECK-NEXT: pop {r0, r1, r2, r3} +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: mov r11, r3 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: -; CHECK: push {r4, r5, r6, r7, lr} -; CHECK: add r7, sp, #12 -; CHECK: mov lr, r11 -; CHECK: mov r6, r10 -; CHECK: mov r5, r9 -; CHECK: mov r4, r8 -; CHECK: push {r4, r5, r6, lr} tail call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"() -; CHECK: pop {r0, r1, r2, r3} -; CHECK: mov r8, r0 -; CHECK: mov r9, r1 -; CHECK: mov r10, r2 -; CHECK: mov r11, r3 -; CHECK: pop {r4, r5, r6, r7, pc} ret void } @@ -110,21 +141,29 @@ ; the high regs. define void @four_high_four_low_frame_ptr_ret_addr() "no-frame-pointer-elim"="true" { ; CHECK-LABEL: four_high_four_low_frame_ptr_ret_addr: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .setfp r7, sp, #12 +; CHECK-NEXT: add r7, sp, #12 +; CHECK-NEXT: mov r6, r11 +; CHECK-NEXT: mov r5, r10 +; CHECK-NEXT: mov r4, r9 +; CHECK-NEXT: mov r3, r8 +; CHECK-NEXT: .save {r8, r9, r10, r11} +; CHECK-NEXT: push {r3, r4, r5, r6} +; CHECK-NEXT: mov r0, lr +; CHECK-NEXT: @APP +; CHECK-NEXT: @NO_APP +; CHECK-NEXT: pop {r0, r1, r2, r3} +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: mov r11, r3 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: -; CHECK: push {r4, r5, r6, r7, lr} -; CHECK: mov r6, r11 -; CHECK: mov r5, r10 -; CHECK: mov r4, r9 -; CHECK: mov r3, r8 -; CHECK: push {r3, r4, r5, r6} %a = tail call i8* @llvm.returnaddress(i32 0) tail call void asm sideeffect "", "r,~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"(i8* %a) -; CHECK: pop {r0, r1, r2, r3} -; CHECK: mov r8, r0 -; CHECK: mov r9, r1 -; CHECK: mov r10, r2 -; CHECK: mov r11, r3 -; CHECK: pop {r4, r5, r6, r7, pc} ret void } @@ -132,20 +171,25 @@ ; registers, so that we can use them for saving the high regs. define void @four_high_four_arg(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-LABEL: four_high_four_arg: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r5, r6, r7, lr} +; CHECK-NEXT: push {r5, r6, r7, lr} +; CHECK-NEXT: mov lr, r11 +; CHECK-NEXT: mov r7, r10 +; CHECK-NEXT: mov r6, r9 +; CHECK-NEXT: mov r5, r8 +; CHECK-NEXT: .save {r8, r9, r10, r11} +; CHECK-NEXT: push {r5, r6, r7, lr} +; CHECK-NEXT: @APP +; CHECK-NEXT: @NO_APP +; CHECK-NEXT: pop {r0, r1, r2, r3} +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r9, r1 +; CHECK-NEXT: mov r10, r2 +; CHECK-NEXT: mov r11, r3 +; CHECK-NEXT: pop {r5, r6, r7, pc} entry: -; CHECK: push {r5, r6, r7, lr} -; CHECK: mov lr, r11 -; CHECK: mov r7, r10 -; CHECK: mov r6, r9 -; CHECK: mov r5, r8 -; CHECK: push {r5, r6, r7, lr} tail call void asm sideeffect "", "r,r,r,r,~{r8},~{r9},~{r10},~{r11}"(i32 %a, i32 %b, i32 %c, i32 %d) -; CHECK: pop {r0, r1, r2, r3} -; CHECK: mov r8, r0 -; CHECK: mov r9, r1 -; CHECK: mov r10, r2 -; CHECK: mov r11, r3 -; CHECK: pop {r5, r6, r7, pc} ret void } @@ -153,24 +197,33 @@ ; registers, so that we can use them for restoring the high regs. define <4 x i32> @four_high_four_return() { ; CHECK-LABEL: four_high_four_return: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: mov lr, r11 +; CHECK-NEXT: mov r7, r10 +; CHECK-NEXT: mov r6, r9 +; CHECK-NEXT: mov r5, r8 +; CHECK-NEXT: .save {r8, r9, r10, r11} +; CHECK-NEXT: push {r5, r6, r7, lr} +; CHECK-NEXT: @APP +; CHECK-NEXT: @NO_APP +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: movs r1, #2 +; CHECK-NEXT: movs r2, #3 +; CHECK-NEXT: movs r3, #4 +; CHECK-NEXT: pop {r4, r5, r6, r7} +; CHECK-NEXT: mov r8, r4 +; CHECK-NEXT: mov r9, r5 +; CHECK-NEXT: mov r10, r6 +; CHECK-NEXT: mov r11, r7 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: -; CHECK: push {r4, r5, r6, r7, lr} -; CHECK: mov lr, r11 -; CHECK: mov r7, r10 -; CHECK: mov r6, r9 -; CHECK: mov r5, r8 -; CHECK: push {r5, r6, r7, lr} tail call void asm sideeffect "", "~{r8},~{r9},~{r10},~{r11}"() %vecinit = insertelement <4 x i32> undef, i32 1, i32 0 %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1 %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2 %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3 -; CHECK: pop {r4, r5, r6, r7} -; CHECK: mov r8, r4 -; CHECK: mov r9, r5 -; CHECK: mov r10, r6 -; CHECK: mov r11, r7 -; CHECK: pop {r4, r5, r6, r7, pc} ret <4 x i32> %vecinit13 } @@ -179,16 +232,39 @@ ; r5, r6), with which to save 4 high registers, so we have to use two pushes ; and pops. define <4 x i32> @all_of_the_above(i32 %a, i32 %b, i32 %c, i32 %d) "no-frame-pointer-elim"="true" { -; CHECK-LABEL: all_of_the_above +; CHECK-LABEL: all_of_the_above: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .setfp r7, sp, #12 +; CHECK-NEXT: add r7, sp, #12 +; CHECK-NEXT: mov r6, r11 +; CHECK-NEXT: mov r5, r10 +; CHECK-NEXT: mov r4, r9 +; CHECK-NEXT: .save {r9, r10, r11} +; CHECK-NEXT: push {r4, r5, r6} +; CHECK-NEXT: mov r6, r8 +; CHECK-NEXT: .save {r8} +; CHECK-NEXT: push {r6} +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: mov r4, lr +; CHECK-NEXT: str r4, [sp] @ 4-byte Spill +; CHECK-NEXT: @APP +; CHECK-NEXT: @NO_APP +; CHECK-NEXT: movs r1, #2 +; CHECK-NEXT: movs r2, #3 +; CHECK-NEXT: movs r3, #4 +; CHECK-NEXT: ldr r0, [sp] @ 4-byte Reload +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: pop {r4, r5, r6} +; CHECK-NEXT: mov r8, r4 +; CHECK-NEXT: mov r9, r5 +; CHECK-NEXT: mov r10, r6 +; CHECK-NEXT: pop {r4} +; CHECK-NEXT: mov r11, r4 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: -; CHECK: push {r4, r5, r6, r7, lr} -; CHECK: add r7, sp, #12 -; CHECK: mov r6, r11 -; CHECK: mov r5, r10 -; CHECK: mov r4, r9 -; CHECK: push {r4, r5, r6} -; CHECK: mov r6, r8 -; CHECK: push {r6} tail call void asm sideeffect "", "r,r,r,r,~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11}"(i32 %a, i32 %b, i32 %c, i32 %d) %e = tail call i8* @llvm.returnaddress(i32 0) %f = ptrtoint i8* %e to i32 @@ -196,13 +272,6 @@ %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1 %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2 %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3 -; CHECK: pop {r4, r5, r6} -; CHECK: mov r8, r4 -; CHECK: mov r9, r5 -; CHECK: mov r10, r6 -; CHECK: pop {r4} -; CHECK: mov r11, r4 -; CHECK: pop {r4, r5, r6, r7, pc} ret <4 x i32> %vecinit13 } @@ -212,25 +281,42 @@ ; used when restoring sp from fp, as that happens before the first pop. define <4 x i32> @base_pointer(i32 %a) { ; CHECK-LABEL: base_pointer: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r6, r7, lr} +; CHECK-NEXT: push {r4, r6, r7, lr} +; CHECK-NEXT: .setfp r7, sp, #8 +; CHECK-NEXT: add r7, sp, #8 +; CHECK-NEXT: mov lr, r9 +; CHECK-NEXT: mov r6, r8 +; CHECK-NEXT: .save {r8, r9} +; CHECK-NEXT: push {r6, lr} +; CHECK-NEXT: mov r6, sp +; CHECK-NEXT: lsls r0, r0, #2 +; CHECK-NEXT: adds r0, r0, #7 +; CHECK-NEXT: movs r1, #7 +; CHECK-NEXT: bics r0, r1 +; CHECK-NEXT: mov r1, sp +; CHECK-NEXT: subs r0, r1, r0 +; CHECK-NEXT: mov sp, r0 +; CHECK-NEXT: @APP +; CHECK-NEXT: @NO_APP +; CHECK-NEXT: movs r0, #1 +; CHECK-NEXT: movs r1, #2 +; CHECK-NEXT: movs r2, #3 +; CHECK-NEXT: movs r3, #4 +; CHECK-NEXT: subs r4, r7, #7 +; CHECK-NEXT: subs r4, #9 +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: pop {r4, r6} +; CHECK-NEXT: mov r8, r4 +; CHECK-NEXT: mov r9, r6 +; CHECK-NEXT: pop {r4, r6, r7, pc} entry: -; CHECK: push {r4, r6, r7, lr} -; CHECK: add r7, sp, #8 -; CHECK: mov lr, r9 -; CHECK: mov r6, r8 -; CHECK: push {r6, lr} -; CHECK: mov r6, sp %b = alloca i32, i32 %a call void asm sideeffect "", "r,~{r8},~{r9}"(i32* %b) %vecinit = insertelement <4 x i32> undef, i32 1, i32 0 %vecinit11 = insertelement <4 x i32> %vecinit, i32 2, i32 1 %vecinit12 = insertelement <4 x i32> %vecinit11, i32 3, i32 2 %vecinit13 = insertelement <4 x i32> %vecinit12, i32 4, i32 3 -; CHECK: subs r4, r7, #7 -; CHECK: subs r4, #9 -; CHECK: mov sp, r4 -; CHECK: pop {r4, r6} -; CHECK: mov r8, r4 -; CHECK: mov r9, r6 -; CHECK: pop {r4, r6, r7, pc} ret <4 x i32> %vecinit13 }