diff --git a/llvm/include/llvm/Support/ARMEHABI.h b/llvm/include/llvm/Support/ARMEHABI.h --- a/llvm/include/llvm/Support/ARMEHABI.h +++ b/llvm/include/llvm/Support/ARMEHABI.h @@ -71,6 +71,10 @@ // Purpose: finish UNWIND_OPCODE_FINISH = 0xb0, + // Format: 10110100 + // Purpose: Pop Return Address Authetication Code + UNWIND_OPCODE_POP_RA_AUTH_CODE = 0xb4, + // Format: 10110001 0000xxxx // Purpose: pop r[3:0] // Constraint: x != 0 diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1153,8 +1153,12 @@ unsigned StartOp = 2 + 2; // Use all the operands. unsigned NumOffset = 0; - // Amount of SP adjustment folded into a push. - unsigned Pad = 0; + // Amount of SP adjustment folded into a push, before the + // registers are stored (pad at higher addresses). + unsigned PadBefore = 0; + // Amount of SP adjustment folded into a push, after the + // registers are stored (pad at lower addresses). + unsigned PadAfter = 0; switch (Opc) { default: @@ -1185,7 +1189,7 @@ "Pad registers must come before restored ones"); unsigned Width = TargetRegInfo->getRegSizeInBits(MO.getReg(), MachineRegInfo) / 8; - Pad += Width; + PadAfter += Width; continue; } // Check for registers that are remapped (for a Thumb1 prologue that @@ -1201,14 +1205,32 @@ case ARM::t2STR_PRE: assert(MI->getOperand(2).getReg() == ARM::SP && "Only stack pointer as a source reg is supported"); + if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(SrcReg)) + SrcReg = RemappedReg; + + RegList.push_back(SrcReg); + break; + case ARM::t2STRD_PRE: + assert(MI->getOperand(3).getReg() == ARM::SP && + "Only stack pointer as a source reg is supported"); + SrcReg = MI->getOperand(1).getReg(); + if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(SrcReg)) + SrcReg = RemappedReg; + RegList.push_back(SrcReg); + SrcReg = MI->getOperand(2).getReg(); + if (unsigned RemappedReg = AFI->EHPrologueRemappedRegs.lookup(SrcReg)) + SrcReg = RemappedReg; RegList.push_back(SrcReg); + PadBefore = -MI->getOperand(4).getImm() - 8; break; } if (MAI->getExceptionHandlingType() == ExceptionHandling::ARM) { + if (PadBefore) + ATS.emitPad(PadBefore); ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); // Account for the SP adjustment, folded into the push. - if (Pad) - ATS.emitPad(Pad); + if (PadAfter) + ATS.emitPad(PadAfter); } } else { // Changes of stack / frame pointer. @@ -1300,6 +1322,10 @@ Offset = MI->getOperand(2).getImm(); AFI->EHPrologueOffsetInRegs[DstReg] |= (Offset << 16); break; + case ARM::t2PAC: + case ARM::t2PACBTI: + AFI->EHPrologueRemappedRegs[ARM::R12] = ARM::RA_AUTH_CODE; + break; default: MI->print(errs()); llvm_unreachable("Unsupported opcode for unwinding information"); diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -377,20 +377,20 @@ /// constructing an outlined call if one exists. Returns 0 otherwise. unsigned findRegisterToSaveLRTo(const outliner::Candidate &C) const; - // Adds an instruction which saves the link register on top of the stack into - /// the MachineBasicBlock \p MBB at position \p It. - void saveLROnStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator It) const; + /// Adds an instruction which saves the link register on top of the stack into + /// the MachineBasicBlock \p MBB at position \p It. If \p Auth is true, + /// compute and store an authentication code alongiside the link register. + /// If \p CFI is true, emit CFI instructions. + void saveLROnStack(MachineBasicBlock &MBB, MachineBasicBlock::iterator It, + bool CFI, bool Auth) const; /// Adds an instruction which restores the link register from the top the - /// stack into the MachineBasicBlock \p MBB at position \p It. + /// stack into the MachineBasicBlock \p MBB at position \p It. If \p Auth is + /// true, restore an authentication code and authenticate LR. + /// If \p CFI is true, emit CFI instructions. void restoreLRFromStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator It) const; - - /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It, - /// for the case when the LR is saved on the stack. - void emitCFIForLRSaveOnStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator It) const; + MachineBasicBlock::iterator It, bool CFI, + bool Auth) const; /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It, /// for the case when the LR is saved in the register \p Reg. @@ -398,11 +398,6 @@ MachineBasicBlock::iterator It, Register Reg) const; - /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It, - /// after the LR is was restored from the stack. - void emitCFIForLRRestoreFromStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator It) const; - /// Emit CFI instructions into the MachineBasicBlock \p MBB at position \p It, /// after the LR is was restored from a register. void emitCFIForLRRestoreFromReg(MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp --- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -5678,7 +5678,7 @@ /// | | Thumb2 | ARM | /// +-------------------------+--------+-----+ /// | Call overhead in Bytes | 4 | 4 | -/// | Frame overhead in Bytes | 4 | 4 | +/// | Frame overhead in Bytes | 2 | 4 | /// | Stack fixup required | No | No | /// +-------------------------+--------+-----+ /// @@ -5755,7 +5755,7 @@ CallThunk(target.isThumb() ? 4 : 4), FrameThunk(target.isThumb() ? 0 : 0), CallNoLRSave(target.isThumb() ? 4 : 4), - FrameNoLRSave(target.isThumb() ? 4 : 4), + FrameNoLRSave(target.isThumb() ? 2 : 4), CallRegSave(target.isThumb() ? 8 : 12), FrameRegSave(target.isThumb() ? 2 : 4), CallDefault(target.isThumb() ? 8 : 12), @@ -5868,11 +5868,17 @@ return outliner::OutlinedFunction(); } + // We expect the majority of the outlining candidates to be in consensus with + // regard to return address sign and authentication, and branch target + // enforcement, in other words, partitioning according to all the four + // possible combinations of PAC-RET and BTI is going to yield one big subset + // and three small (likely empty) subsets. That allows us to cull incompatible + // candidates separately for PAC-RET and BTI. + // Partition the candidates in two sets: one with BTI enabled and one with BTI - // disabled. Remove the candidates from the smaller set. We expect the - // majority of the candidates to be in consensus with regard to branch target - // enforcement with just a few oddballs, but if they are the same number - // prefer the non-BTI ones for outlining, since they have less overhead. + // disabled. Remove the candidates from the smaller set. If they are the same + // number prefer the non-BTI ones for outlining, since they have less + // overhead. auto NoBTI = llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) { const ARMFunctionInfo &AFI = *C.getMF()->getInfo(); @@ -5883,6 +5889,24 @@ RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end()); else RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI); + + if (RepeatedSequenceLocs.size() < 2) + return outliner::OutlinedFunction(); + + // Likewise, partition the candidates according to PAC-RET enablement. + auto NoPAC = + llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) { + const ARMFunctionInfo &AFI = *C.getMF()->getInfo(); + // If the function happens to not spill the LR, do not disqualify it + // from the outlining. + return AFI.shouldSignReturnAddress(true); + }); + if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) > + std::distance(NoPAC, RepeatedSequenceLocs.end())) + RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end()); + else + RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC); + if (RepeatedSequenceLocs.size() < 2) return outliner::OutlinedFunction(); @@ -5899,6 +5923,7 @@ }; OutlinerCosts Costs(Subtarget); + const auto &SomeMFI = *RepeatedSequenceLocs.front().getMF()->getInfo(); // Adjust costs to account for the BTI instructions. @@ -5909,6 +5934,13 @@ Costs.FrameTailCall += 4; Costs.FrameThunk += 4; } + + // Adjust costs to account for sign and authentication instructions. + if (SomeMFI.shouldSignReturnAddress(true)) { + Costs.CallDefault += 8; // +PAC instr, +AUT instr + Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr + } + unsigned FrameID = MachineOutlinerDefault; unsigned NumBytesToCreateFrame = Costs.FrameDefault; @@ -6325,6 +6357,11 @@ // * LR is available in the range (No save/restore around call) // * The range doesn't include calls (No save/restore in outlined frame) // are true. + // These conditions also ensure correctness of the return address + // authentication - we insert sign and authentication instructions only if + // we save/restore LR on stack, but then this condition ensures that the + // outlined range does not modify the SP, therefore the SP value used for + // signing is the same as the one used for authentication. // FIXME: This is very restrictive; the flags check the whole block, // not just the bit we will try to outline. bool MightNeedStackFixUp = @@ -6369,23 +6406,39 @@ } void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB, - MachineBasicBlock::iterator It) const { - unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; - int Align = -Subtarget.getStackAlignment().value(); - BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP) - .addReg(ARM::LR, RegState::Kill) - .addReg(ARM::SP) - .addImm(Align) - .add(predOps(ARMCC::AL)); -} + MachineBasicBlock::iterator It, bool CFI, + bool Auth) const { + int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8)); + assert(Align >= 8 && Align <= 256); + if (Auth) { + assert(Subtarget.isThumb2()); + // Compute PAC in R12. Outlining ensures R12 is dead across the outlined + // sequence. + BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)) + .setMIFlags(MachineInstr::FrameSetup); + BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP) + .addReg(ARM::R12, RegState::Kill) + .addReg(ARM::LR, RegState::Kill) + .addReg(ARM::SP) + .addImm(-Align) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameSetup); + } else { + unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM; + BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP) + .addReg(ARM::LR, RegState::Kill) + .addReg(ARM::SP) + .addImm(-Align) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameSetup); + } + + if (!CFI) + return; -void ARMBaseInstrInfo::emitCFIForLRSaveOnStack( - MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const { MachineFunction &MF = *MBB.getParent(); - const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); - unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); - int Align = Subtarget.getStackAlignment().value(); - // Add a CFI saying the stack was moved down. + + // Add a CFI, saying CFA is offset by Align bytes from SP. int64_t StackPosEntry = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Align)); BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) @@ -6394,11 +6447,23 @@ // Add a CFI saying that the LR that we want to find is now higher than // before. - int64_t LRPosEntry = - MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfLR, -Align)); + int LROffset = Auth ? Align - 4 : Align; + const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); + unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); + int64_t LRPosEntry = MF.addFrameInst( + MCCFIInstruction::createOffset(nullptr, DwarfLR, -LROffset)); BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) .addCFIIndex(LRPosEntry) .setMIFlags(MachineInstr::FrameSetup); + if (Auth) { + // Add a CFI for the location of the return adddress PAC. + unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true); + int64_t RACPosEntry = MF.addFrameInst( + MCCFIInstruction::createOffset(nullptr, DwarfRAC, -Align)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(RACPosEntry) + .setMIFlags(MachineInstr::FrameSetup); + } } void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB, @@ -6416,35 +6481,64 @@ .setMIFlags(MachineInstr::FrameSetup); } -void ARMBaseInstrInfo::restoreLRFromStack( - MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const { - unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; - MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR) - .addReg(ARM::SP, RegState::Define) - .addReg(ARM::SP); - if (!Subtarget.isThumb()) - MIB.addReg(0); - MIB.addImm(Subtarget.getStackAlignment().value()).add(predOps(ARMCC::AL)); -} +void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB, + MachineBasicBlock::iterator It, + bool CFI, bool Auth) const { + int Align = Subtarget.getStackAlignment().value(); + if (Auth) { + assert(Subtarget.isThumb2()); + // Restore return address PAC and LR. + BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST)) + .addReg(ARM::R12, RegState::Define) + .addReg(ARM::LR, RegState::Define) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP) + .addImm(Align) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameDestroy); + // LR authentication is after the CFI instructions, below. + } else { + unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; + MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::SP); + if (!Subtarget.isThumb()) + MIB.addReg(0); + MIB.addImm(Subtarget.getStackAlignment().value()) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameDestroy); + } -void ARMBaseInstrInfo::emitCFIForLRRestoreFromStack( - MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const { - // Now stack has moved back up... - MachineFunction &MF = *MBB.getParent(); - const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); - unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); - int64_t StackPosEntry = - MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0)); - BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) - .addCFIIndex(StackPosEntry) - .setMIFlags(MachineInstr::FrameDestroy); + if (CFI) { + // Now stack has moved back up... + MachineFunction &MF = *MBB.getParent(); + const MCRegisterInfo *MRI = Subtarget.getRegisterInfo(); + unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true); + int64_t StackPosEntry = + MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(StackPosEntry) + .setMIFlags(MachineInstr::FrameDestroy); + + // ... and we have restored LR. + int64_t LRPosEntry = + MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(LRPosEntry) + .setMIFlags(MachineInstr::FrameDestroy); + + if (Auth) { + unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true); + int64_t Entry = + MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, DwarfRAC)); + BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) + .addCFIIndex(Entry) + .setMIFlags(MachineInstr::FrameDestroy); + } + } - // ... and we have restored LR. - int64_t LRPosEntry = - MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR)); - BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION)) - .addCFIIndex(LRPosEntry) - .setMIFlags(MachineInstr::FrameDestroy); + if (Auth) + BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT)); } void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg( @@ -6500,8 +6594,11 @@ MBB.addLiveIn(ARM::LR); // Insert a save before the outlined region - saveLROnStack(MBB, It); - emitCFIForLRSaveOnStack(MBB, It); + bool Auth = OF.Candidates.front() + .getMF() + ->getInfo() + ->shouldSignReturnAddress(true); + saveLROnStack(MBB, It, true, Auth); // Fix up the instructions in the range, since we're going to modify the // stack. @@ -6510,8 +6607,7 @@ fixupPostOutline(MBB); // Insert a restore before the terminator for the function. Restore LR. - restoreLRFromStack(MBB, Et); - emitCFIForLRRestoreFromStack(MBB, Et); + restoreLRFromStack(MBB, Et, true, Auth); } // If this is a tail call outlined function, then there's already a return. @@ -6590,13 +6686,10 @@ // We have the default case. Save and restore from SP. if (!MBB.isLiveIn(ARM::LR)) MBB.addLiveIn(ARM::LR); - saveLROnStack(MBB, It); - if (!AFI.isLRSpilled()) - emitCFIForLRSaveOnStack(MBB, It); + bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true); + saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth); CallPt = MBB.insert(It, CallMIB); - restoreLRFromStack(MBB, It); - if (!AFI.isLRSpilled()) - emitCFIForLRRestoreFromStack(MBB, It); + restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth); It--; return CallPt; } diff --git a/llvm/lib/Target/ARM/ARMBranchTargets.cpp b/llvm/lib/Target/ARM/ARMBranchTargets.cpp --- a/llvm/lib/Target/ARM/ARMBranchTargets.cpp +++ b/llvm/lib/Target/ARM/ARMBranchTargets.cpp @@ -108,6 +108,7 @@ bool IsFirstBB) { // Which instruction to insert: BTI or PACBTI unsigned OpCode = ARM::t2BTI; + unsigned MIFlags = 0; // Skip meta instructions, including EH labels auto MBBI = llvm::find_if_not(MBB.instrs(), [](const MachineInstr &MI) { @@ -121,6 +122,7 @@ LLVM_DEBUG(dbgs() << "Removing a 'PAC' instr from BB '" << MBB.getName() << "' to replace with PACBTI\n"); OpCode = ARM::t2PACBTI; + MIFlags = MachineInstr::FrameSetup; auto NextMBBI = std::next(MBBI); MBBI->eraseFromParent(); MBBI = NextMBBI; @@ -131,5 +133,6 @@ << (OpCode == ARM::t2BTI ? "BTI" : "PACBTI") << "' instr into BB '" << MBB.getName() << "'\n"); // Finally, insert a new instruction (either PAC or PACBTI) - BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII.get(OpCode)); + BuildMI(MBB, MBBI, MBB.findDebugLoc(MBBI), TII.get(OpCode)) + .setMIFlags(MIFlags); } diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp --- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -2160,6 +2160,11 @@ return true; } case ARM::tBXNS_RET: { + // For v8.0-M.Main we need to authenticate LR before clearing FPRs, which + // uses R12 as a scratch register. + if (!STI->hasV8_1MMainlineOps() && AFI->shouldSignReturnAddress()) + BuildMI(MBB, MBBI, DebugLoc(), TII->get(ARM::t2AUT)); + MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI); if (STI->hasV8_1MMainlineOps()) { @@ -2169,6 +2174,9 @@ .addReg(ARM::SP) .addImm(4) .add(predOps(ARMCC::AL)); + + if (AFI->shouldSignReturnAddress()) + BuildMI(AfterBB, AfterBB.end(), DebugLoc(), TII->get(ARM::t2AUT)); } // Clear all GPR that are not a use of the return instruction. diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -503,20 +503,12 @@ StackAdjustingInsts DefCFAOffsetCandidates; bool HasFP = hasFP(MF); - // Allocate the vararg register save area. - if (ArgRegsSaveSize) { - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, - MachineInstr::FrameSetup); - DefCFAOffsetCandidates.addInst(std::prev(MBBI), ArgRegsSaveSize, true); - } - if (!AFI->hasStackFrame() && (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) { - if (NumBytes - ArgRegsSaveSize != 0) { - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -(NumBytes - ArgRegsSaveSize), + if (NumBytes != 0) { + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, MachineInstr::FrameSetup); - DefCFAOffsetCandidates.addInst(std::prev(MBBI), - NumBytes - ArgRegsSaveSize, true); + DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true); } DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP); return; @@ -562,13 +554,26 @@ } } - // Move past FPCXT area. MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push; + + // Move past the PAC computation. + if (AFI->shouldSignReturnAddress()) + LastPush = MBBI++; + + // Move past FPCXT area. if (FPCXTSaveSize > 0) { LastPush = MBBI++; DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true); } + // Allocate the vararg register save area. + if (ArgRegsSaveSize) { + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize, + MachineInstr::FrameSetup); + LastPush = std::prev(MBBI); + DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true); + } + // Move past area 1. if (GPRCS1Size > 0) { GPRCS1Push = LastPush = MBBI++; @@ -788,7 +793,8 @@ case ARM::R11: case ARM::R12: if (STI.splitFramePushPop(MF)) { - unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); + unsigned DwarfReg = MRI->getDwarfRegNum( + Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true); unsigned Offset = MFI.getObjectOffset(FI); unsigned CFIIndex = MF.addFrameInst( MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); @@ -923,8 +929,9 @@ DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); if (!AFI->hasStackFrame()) { - if (NumBytes - ReservedArgStack != 0) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes - ReservedArgStack, + if (NumBytes + IncomingArgStackToRestore != 0) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, + NumBytes + IncomingArgStackToRestore, MachineInstr::FrameDestroy); } else { // Unwind MBBI to point to first LDR / VLDRD. @@ -1007,15 +1014,21 @@ if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; - if (AFI->getFPCXTSaveAreaSize()) MBBI++; - } - if (ReservedArgStack || IncomingArgStackToRestore) { - assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 && - "attempting to restore negative stack amount"); - emitSPUpdate(isARM, MBB, MBBI, dl, TII, - ReservedArgStack + IncomingArgStackToRestore, - MachineInstr::FrameDestroy); + if (ReservedArgStack || IncomingArgStackToRestore) { + assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 && + "attempting to restore negative stack amount"); + emitSPUpdate(isARM, MBB, MBBI, dl, TII, + ReservedArgStack + IncomingArgStackToRestore, + MachineInstr::FrameDestroy); + } + + // Validate PAC, It should have been already popped into R12. For CMSE entry + // function, the validation instruction is emitted during expansion of the + // tBXNS_RET, since the validation must use the value of SP at function + // entry, before saving, resp. after restoring, FPCXTNS. + if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction()) + BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT)); } } @@ -1199,6 +1212,7 @@ const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); ARMFunctionInfo *AFI = MF.getInfo(); + bool hasPAC = AFI->shouldSignReturnAddress(); DebugLoc DL; bool isTailCall = false; bool isInterrupt = false; @@ -1231,7 +1245,7 @@ continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 && - STI.hasV5TOps() && MBB.succ_empty()) { + STI.hasV5TOps() && MBB.succ_empty() && !hasPAC) { Reg = ARM::PC; // Fold the return instruction into the LDM. DeleteRet = true; @@ -1580,6 +1594,11 @@ ARM::t2STR_PRE : ARM::STR_PRE_IMM; unsigned FltOpc = ARM::VSTMDDB_UPD; unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs(); + // Compute PAC in R12. + if (AFI->shouldSignReturnAddress()) { + BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC)) + .setMIFlags(MachineInstr::FrameSetup); + } // Save the non-secure floating point context. if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) { return C.getReg() == ARM::FPCXTNS; @@ -1789,6 +1808,13 @@ MF.getInfo()->isCmseNSEntryFunction()) return false; + // We are disabling shrinkwrapping for now when PAC is enabled, as + // shrinkwrapping can cause clobbering of r12 when the PAC code is + // generated. A follow-up patch will fix this in a more performant manner. + if (MF.getInfo()->shouldSignReturnAddress( + false /*SpillsLR */)) + return false; + return true; } @@ -2315,6 +2341,26 @@ CSI.back().setRestored(false); } + // For functions, which sign their return address, upon function entry, the + // return address PAC is computed in R12. Treat R12 as a callee-saved register + // in this case. + const auto &AFI = *MF.getInfo(); + if (AFI.shouldSignReturnAddress()) { + // The order of register must match the order we push them, because the + // PEI assigns frame indices in that order. When compiling for return + // address sign and authenication, we use split push, therefore the orders + // we want are: + // LR, R7, R6, R5, R4, , R11, R10, R9, R8, D15-D8 + CSI.insert(find_if(CSI, + [=](const auto &CS) { + unsigned Reg = CS.getReg(); + return Reg == ARM::R10 || Reg == ARM::R11 || + Reg == ARM::R8 || Reg == ARM::R9 || + ARM::DPRRegClass.contains(Reg); + }), + CalleeSavedInfo(ARM::R12)); + } + return false; } diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -2952,9 +2952,17 @@ // Indirect tail calls cannot be optimized for Thumb1 if the args // to the call take up r0-r3. The reason is that there are no legal registers // left to hold the pointer to the function to be called. - if (Subtarget->isThumb1Only() && Outs.size() >= 4 && - (!isa(Callee.getNode()) || isIndirect)) - return false; + // Similarly, if the function uses return address sign and authentication, + // r12 is needed to hold the PAC and is not available to hold the callee + // address. + if (Outs.size() >= 4 && + (!isa(Callee.getNode()) || isIndirect)) { + if (Subtarget->isThumb1Only()) + return false; + // Conservatively assume the function spills LR. + if (MF.getInfo()->shouldSignReturnAddress(true)) + return false; + } // Look for obvious safe cases to perform tail call optimization that do not // require ABI changes. This is what gcc calls sibcall. diff --git a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp --- a/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -2121,7 +2121,7 @@ bool Modified = false; for (MachineBasicBlock &MBB : Fn) { Modified |= LoadStoreMultipleOpti(MBB); - if (STI->hasV5TOps()) + if (STI->hasV5TOps() && !AFI->shouldSignReturnAddress()) Modified |= MergeReturnIntoLDM(MBB); if (isThumb1) Modified |= CombineMovBx(MBB); diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -289,7 +289,7 @@ return false; if (SignReturnAddressAll) return true; - return LRSpilled; + return SpillsLR; } bool branchTargetEnforcement() const { return BranchTargetEnforcement; } diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td --- a/llvm/lib/Target/ARM/ARMRegisterInfo.td +++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -211,6 +211,8 @@ def ZR : ARMReg<15, "zr">, DwarfRegNum<[15]>; +def RA_AUTH_CODE : ARMReg<12, "ra_auth_code">, DwarfRegNum<[143]>; + // Register classes. // // pc == Program Counter diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -18,6 +18,7 @@ #include "ARMConstantPoolValue.h" #include "ARMFrameLowering.h" #include "ARMISelLowering.h" +#include "ARMMachineFunctionInfo.h" #include "ARMSelectionDAGInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -844,6 +845,8 @@ /// to lr. This is always required on Thumb1-only targets, as the push and /// pop instructions can't access the high registers. bool splitFramePushPop(const MachineFunction &MF) const { + if (MF.getInfo()->shouldSignReturnAddress()) + return true; return (getFramePointerReg() == ARM::R7 && MF.getTarget().Options.DisableFramePointerElim(MF)) || isThumb1Only(); diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp --- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -452,7 +452,8 @@ int tryParseRegister(); bool tryParseRegisterWithWriteBack(OperandVector &); int tryParseShiftRegister(OperandVector &); - bool parseRegisterList(OperandVector &, bool EnforceOrder = true); + bool parseRegisterList(OperandVector &, bool EnforceOrder = true, + bool AllowRAAC = false); bool parseMemory(OperandVector &); bool parseOperand(OperandVector &, StringRef Mnemonic); bool parsePrefix(ARMMCExpr::VariantKind &RefKind); @@ -4464,8 +4465,8 @@ } /// Parse a register list. -bool ARMAsmParser::parseRegisterList(OperandVector &Operands, - bool EnforceOrder) { +bool ARMAsmParser::parseRegisterList(OperandVector &Operands, bool EnforceOrder, + bool AllowRAAC) { MCAsmParser &Parser = getParser(); if (Parser.getTok().isNot(AsmToken::LCurly)) return TokError("Token is not a Left Curly Brace"); @@ -4478,7 +4479,8 @@ int Reg = tryParseRegister(); if (Reg == -1) return Error(RegLoc, "register expected"); - + if (!AllowRAAC && Reg == ARM::RA_AUTH_CODE) + return Error(RegLoc, "pseudo-register not allowed"); // The reglist instructions have at most 16 registers, so reserve // space for that many. int EReg = 0; @@ -4492,7 +4494,8 @@ ++Reg; } const MCRegisterClass *RC; - if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) + if (Reg == ARM::RA_AUTH_CODE || + ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) RC = &ARMMCRegisterClasses[ARM::GPRRegClassID]; else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) RC = &ARMMCRegisterClasses[ARM::DPRRegClassID]; @@ -4513,11 +4516,15 @@ while (Parser.getTok().is(AsmToken::Comma) || Parser.getTok().is(AsmToken::Minus)) { if (Parser.getTok().is(AsmToken::Minus)) { + if (Reg == ARM::RA_AUTH_CODE) + return Error(RegLoc, "pseudo-register not allowed"); Parser.Lex(); // Eat the minus. SMLoc AfterMinusLoc = Parser.getTok().getLoc(); int EndReg = tryParseRegister(); if (EndReg == -1) return Error(AfterMinusLoc, "register expected"); + if (EndReg == ARM::RA_AUTH_CODE) + return Error(AfterMinusLoc, "pseudo-register not allowed"); // Allow Q regs and just interpret them as the two D sub-registers. if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg)) EndReg = getDRegFromQReg(EndReg) + 1; @@ -4526,7 +4533,9 @@ if (Reg == EndReg) continue; // The register must be in the same register class as the first. - if (!RC->contains(EndReg)) + if ((Reg == ARM::RA_AUTH_CODE && + RC != &ARMMCRegisterClasses[ARM::GPRRegClassID]) || + (Reg != ARM::RA_AUTH_CODE && !RC->contains(Reg))) return Error(AfterMinusLoc, "invalid register in register list"); // Ranges must go from low to high. if (MRI->getEncodingValue(Reg) > MRI->getEncodingValue(EndReg)) @@ -4551,13 +4560,15 @@ Reg = tryParseRegister(); if (Reg == -1) return Error(RegLoc, "register expected"); + if (!AllowRAAC && Reg == ARM::RA_AUTH_CODE) + return Error(RegLoc, "pseudo-register not allowed"); // Allow Q regs and just interpret them as the two D sub-registers. bool isQReg = false; if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { Reg = getDRegFromQReg(Reg); isQReg = true; } - if (!RC->contains(Reg) && + if (Reg != ARM::RA_AUTH_CODE && !RC->contains(Reg) && RC->getID() == ARMMCRegisterClasses[ARM::GPRRegClassID].getID() && ARMMCRegisterClasses[ARM::GPRwithAPSRnospRegClassID].contains(Reg)) { // switch the register classes, as GPRwithAPSRnospRegClassID is a partial @@ -4577,7 +4588,9 @@ continue; } // The register must be in the same register class as the first. - if (!RC->contains(Reg)) + if ((Reg == ARM::RA_AUTH_CODE && + RC != &ARMMCRegisterClasses[ARM::GPRRegClassID]) || + (Reg != ARM::RA_AUTH_CODE && !RC->contains(Reg))) return Error(RegLoc, "invalid register in register list"); // In most cases, the list must be monotonically increasing. An // exception is CLRM, which is order-independent anyway, so @@ -11685,7 +11698,7 @@ SmallVector, 1> Operands; // Parse the register list - if (parseRegisterList(Operands) || + if (parseRegisterList(Operands, true, true) || parseToken(AsmToken::EndOfStatement, "unexpected token in directive")) return true; ARMOperand &Op = (ARMOperand &)*Operands[0]; diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -1289,34 +1289,65 @@ PendingOffset -= Offset; } -void ARMELFStreamer::emitRegSave(const SmallVectorImpl &RegList, - bool IsVector) { - // Collect the registers in the register list - unsigned Count = 0; +static std::pair +collectHWRegs(const MCRegisterInfo &MRI, unsigned Idx, + const SmallVectorImpl &RegList, bool IsVector, + uint32_t &Mask_) { uint32_t Mask = 0; - const MCRegisterInfo *MRI = getContext().getRegisterInfo(); - for (size_t i = 0; i < RegList.size(); ++i) { - unsigned Reg = MRI->getEncodingValue(RegList[i]); + unsigned Count = 0; + while (Idx > 0) { + unsigned Reg = RegList[Idx - 1]; + if (Reg == ARM::RA_AUTH_CODE) + break; + Reg = MRI.getEncodingValue(Reg); assert(Reg < (IsVector ? 32U : 16U) && "Register out of range"); unsigned Bit = (1u << Reg); if ((Mask & Bit) == 0) { Mask |= Bit; ++Count; } + --Idx; } - // Track the change the $sp offset: For the .save directive, the - // corresponding push instruction will decrease the $sp by (4 * Count). - // For the .vsave directive, the corresponding vpush instruction will - // decrease $sp by (8 * Count). - SPOffset -= Count * (IsVector ? 8 : 4); + Mask_ = Mask; + return {Idx, Count}; +} - // Emit the opcode - FlushPendingOffset(); - if (IsVector) - UnwindOpAsm.EmitVFPRegSave(Mask); - else - UnwindOpAsm.EmitRegSave(Mask); +void ARMELFStreamer::emitRegSave(const SmallVectorImpl &RegList, + bool IsVector) { + uint32_t Mask; + unsigned Idx, Count; + const MCRegisterInfo &MRI = *getContext().getRegisterInfo(); + + // Collect the registers in the register list. Issue unwinding instructions in + // three parts: ordinary hardware registers, return address authentication + // code pseudo register, the rest of the registers. The RA PAC is kept in an + // architectural register (usually r12), but we treat it as a special case in + // order to distinguish between that register containing RA PAC or a general + // value. + Idx = RegList.size(); + while (Idx > 0) { + std::tie(Idx, Count) = collectHWRegs(MRI, Idx, RegList, IsVector, Mask); + if (Count) { + // Track the change the $sp offset: For the .save directive, the + // corresponding push instruction will decrease the $sp by (4 * Count). + // For the .vsave directive, the corresponding vpush instruction will + // decrease $sp by (8 * Count). + SPOffset -= Count * (IsVector ? 8 : 4); + + // Emit the opcode + FlushPendingOffset(); + if (IsVector) + UnwindOpAsm.EmitVFPRegSave(Mask); + else + UnwindOpAsm.EmitRegSave(Mask); + } else if (Idx > 0 && RegList[Idx - 1] == ARM::RA_AUTH_CODE) { + --Idx; + SPOffset -= 4; + FlushPendingOffset(); + UnwindOpAsm.EmitRegSave(0); + } + } } void ARMELFStreamer::emitUnwindRaw(int64_t Offset, diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp @@ -64,8 +64,11 @@ } // end anonymous namespace void UnwindOpcodeAssembler::EmitRegSave(uint32_t RegSave) { - if (RegSave == 0u) + if (RegSave == 0u) { + // That's the special case for RA PAC. + EmitInt8(ARM::EHABI::UNWIND_OPCODE_POP_RA_AUTH_CODE); return; + } // One byte opcode to save register r14 and r11-r4 if (RegSave & (1u << 4)) { diff --git a/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll b/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll --- a/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll +++ b/llvm/test/CodeGen/ARM/2012-10-04-AAPCS-byval-align8.ll @@ -10,8 +10,7 @@ ; CHECK-LABEL: test_byval_8_bytes_alignment: define void @test_byval_8_bytes_alignment(i32 %i, ...) { entry: -; CHECK: sub sp, sp, #12 -; CHECK: sub sp, sp, #4 +; CHECK: sub sp, sp, #16 ; CHECK: add r0, sp, #4 ; CHECK: stmib sp, {r1, r2, r3} %g = alloca i8* diff --git a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll --- a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll +++ b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll @@ -6,7 +6,7 @@ declare void @bar1() define void @foo()#0 { -; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpcxtns $fpcxts $fpexc $fpinst $fpscr $fpscr_nzcv $fpscr_nzcvqc $fpsid $itstate $pc $sp $spsr $vpr $zr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $p0 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r0_r1 $r2_r3 $r12_sp $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30 +; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpcxtns $fpcxts $fpexc $fpinst $fpscr $fpscr_nzcv $fpscr_nzcvqc $fpsid $itstate $pc $ra_auth_code $sp $spsr $vpr $zr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $p0 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r0_r1 $r2_r3 $r12_sp $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30 call void @bar1() call void @bar2() ret void diff --git a/llvm/test/CodeGen/ARM/machine-outliner-calls.mir b/llvm/test/CodeGen/ARM/machine-outliner-calls.mir --- a/llvm/test/CodeGen/ARM/machine-outliner-calls.mir +++ b/llvm/test/CodeGen/ARM/machine-outliner-calls.mir @@ -311,7 +311,7 @@ ; CHECK-LABEL: name: OUTLINED_FUNCTION_0 ; CHECK: bb.0: ; CHECK: liveins: $r11, $r10, $r9, $r8, $r7, $r6, $r5, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr - ; CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -8 ; CHECK: BL @bar, implicit-def dead $lr, implicit $sp @@ -320,13 +320,13 @@ ; CHECK: $r2 = MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r3 = MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r4 = MOVi 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ; CHECK: MOVPCLR 14 /* CC::al */, $noreg ; CHECK-LABEL: name: OUTLINED_FUNCTION_1 ; CHECK: bb.0: ; CHECK: liveins: $r11, $r10, $r9, $r8, $r7, $r6, $r5, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr - ; CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -8 ; CHECK: BL @bar, implicit-def dead $lr, implicit $sp @@ -335,7 +335,7 @@ ; CHECK: $r2 = MOVi 2, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r3 = MOVi 2, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r4 = MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ; CHECK: TAILJMPd @bar, implicit $sp ; CHECK-LABEL: name: OUTLINED_FUNCTION_2 @@ -351,27 +351,27 @@ ; CHECK-LABEL: name: OUTLINED_FUNCTION_3 ; CHECK: bb.0: ; CHECK: liveins: $r11, $r10, $r9, $r8, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr - ; CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -8 ; CHECK: tBL 14 /* CC::al */, $noreg, @bar, implicit-def dead $lr, implicit $sp ; CHECK: $r0 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r1 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r2 = t2MOVi 2, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ; CHECK: tTAILJMPdND @bar, 14 /* CC::al */, $noreg, implicit $sp ; CHECK-LABEL: name: OUTLINED_FUNCTION_4 ; CHECK: bb.0: ; CHECK: liveins: $r11, $r10, $r9, $r8, $r6, $r5, $r4, $d15, $d14, $d13, $d12, $d11, $d10, $d9, $d8, $lr - ; CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -8 ; CHECK: tBL 14 /* CC::al */, $noreg, @bar, implicit-def dead $lr, implicit $sp ; CHECK: $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r1 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg ; CHECK: $r2 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg - ; CHECK: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ; CHECK: tBX_RET 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/ARM/machine-outliner-default.mir b/llvm/test/CodeGen/ARM/machine-outliner-default.mir --- a/llvm/test/CodeGen/ARM/machine-outliner-default.mir +++ b/llvm/test/CodeGen/ARM/machine-outliner-default.mir @@ -18,19 +18,19 @@ ; CHECK-LABEL: name: outline_default_arm ; CHECK: bb.0: ; CHECK: liveins: $lr - ; CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: BL @OUTLINED_FUNCTION_0 - ; CHECK: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ; CHECK: bb.1: ; CHECK: liveins: $lr, $r6, $r7, $r8, $r9, $r10, $r11 - ; CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: BL @OUTLINED_FUNCTION_0 - ; CHECK: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ; CHECK: bb.2: ; CHECK: liveins: $lr, $r6, $r7, $r8, $r9, $r10, $r11 - ; CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: BL @OUTLINED_FUNCTION_0 - ; CHECK: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ; CHECK: bb.3: ; CHECK: liveins: $lr, $r6, $r7, $r8, $r9, $r10, $r11 ; CHECK: $r2 = MOVr $lr, 14 /* CC::al */, $noreg, $noreg @@ -72,19 +72,19 @@ ; CHECK-LABEL: name: outline_default_thumb ; CHECK: bb.0: ; CHECK: liveins: $lr - ; CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_1 - ; CHECK: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ; CHECK: bb.1: ; CHECK: liveins: $lr, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - ; CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_1 - ; CHECK: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ; CHECK: bb.2: ; CHECK: liveins: $lr, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11 - ; CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_1 - ; CHECK: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ; CHECK: bb.3: ; CHECK: liveins: $lr, $r4, $r5, $r6, $r7, $r8, $r9, $r10, $r11 ; CHECK: $r2 = tMOVr $lr, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/ARM/machine-outliner-lr-regsave.mir b/llvm/test/CodeGen/ARM/machine-outliner-lr-regsave.mir --- a/llvm/test/CodeGen/ARM/machine-outliner-lr-regsave.mir +++ b/llvm/test/CodeGen/ARM/machine-outliner-lr-regsave.mir @@ -28,9 +28,9 @@ ; CHECK: $lr = MOVr killed $r6, 14 /* CC::al */, $noreg, $noreg ; CHECK: bb.2: ; CHECK: liveins: $lr - ; CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: BL @OUTLINED_FUNCTION_1 - ; CHECK: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ; CHECK: bb.3: ; CHECK: liveins: $lr, $r0, $r6, $r7, $r8, $r9, $r10, $r11 ; CHECK: $r6 = MOVr killed $lr, 14 /* CC::al */, $noreg, $noreg @@ -95,9 +95,9 @@ ; CHECK: $lr = tMOVr killed $r6, 14 /* CC::al */, $noreg ; CHECK: bb.2: ; CHECK: liveins: $lr - ; CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ; CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ; CHECK: tBL 14 /* CC::al */, $noreg, @OUTLINED_FUNCTION_0 - ; CHECK: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ; CHECK: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ; CHECK: bb.3: ; CHECK: liveins: $lr, $r0, $r6, $r7 ; CHECK: $r6 = tMOVr killed $lr, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir --- a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir +++ b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-arm.mir @@ -146,41 +146,41 @@ BX_RET 14, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I5]] - ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: $d0 = VLDRD $sp, 2, 14 /* CC::al */, $noreg ;CHECK-NEXT: $d1 = VLDRD $sp, 10, 14 /* CC::al */, $noreg ;CHECK-NEXT: $d4 = VLDRD $sp, 255, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I5FP16]] - ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: $s1 = VLDRH $sp, 4, 14, $noreg ;CHECK-NEXT: $s2 = VLDRH $sp, 12, 14, $noreg ;CHECK-NEXT: $s5 = VLDRH $sp, 244, 14, $noreg - ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I12]] - ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: $r1 = LDRi12 $sp, 8, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r2 = LDRi12 $sp, 16, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r5 = LDRi12 $sp, 4094, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I3]] - ;CHECK: early-clobber $sp = STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup STR_PRE_IMM killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: BL @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: $r1 = LDRSH $sp, $noreg, 8, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r2 = LDRSH $sp, $noreg, 16, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r5 = LDRSH $sp, $noreg, 255, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy LDR_POST_IMM $sp, $noreg, 8, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir --- a/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir +++ b/llvm/test/CodeGen/ARM/machine-outliner-stack-fixup-thumb.mir @@ -181,51 +181,51 @@ BX_RET 14, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[LDREX]] - ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: $r1 = t2LDREX $sp, 2, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r1 = t2LDREX $sp, 10, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r1 = t2LDREX $sp, 255, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I8]] - ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: t2STRHT $r0, $sp, 8, 14 /* CC::al */, $noreg ;CHECK-NEXT: t2STRHT $r0, $sp, 12, 14 /* CC::al */, $noreg ;CHECK-NEXT: t2STRHT $r0, $sp, 255, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I8S4]] - ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 8, 14 /* CC::al */, $noreg ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 16, 14 /* CC::al */, $noreg ;CHECK-NEXT: t2STRDi8 $r0, $r1, $sp, 1020, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[I12]] - ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: $r0 = t2LDRi12 $sp, 8, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r0 = t2LDRi12 $sp, 12, 14 /* CC::al */, $noreg ;CHECK-NEXT: $r0 = t2LDRi12 $sp, 4094, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg ;CHECK: name: OUTLINED_FUNCTION_[[T1_S]] - ;CHECK: early-clobber $sp = t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg + ;CHECK: early-clobber $sp = frame-setup t2STR_PRE killed $lr, $sp, -8, 14 /* CC::al */, $noreg ;CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 8 ;CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -8 ;CHECK-NEXT: tBL 14 /* CC::al */, $noreg, @foo, implicit-def dead $lr, implicit $sp ;CHECK-NEXT: tSTRspi $r0, $sp, 2, 14 /* CC::al */, $noreg ;CHECK-NEXT: tSTRspi $r0, $sp, 6, 14 /* CC::al */, $noreg ;CHECK-NEXT: tSTRspi $r0, $sp, 255, 14 /* CC::al */, $noreg - ;CHECK-NEXT: $lr, $sp = t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg + ;CHECK-NEXT: $lr, $sp = frame-destroy t2LDR_POST $sp, 8, 14 /* CC::al */, $noreg diff --git a/llvm/test/CodeGen/ARM/va_arg.ll b/llvm/test/CodeGen/ARM/va_arg.ll --- a/llvm/test/CodeGen/ARM/va_arg.ll +++ b/llvm/test/CodeGen/ARM/va_arg.ll @@ -5,10 +5,8 @@ define i64 @test1(i32 %i, ...) nounwind optsize { ; CHECK-LABEL: test1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #12 -; CHECK-NEXT: sub sp, sp, #12 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, sp, #4 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: add r0, sp, #4 ; CHECK-NEXT: stmib sp, {r1, r2, r3} ; CHECK-NEXT: add r0, r0, #7 @@ -19,8 +17,7 @@ ; CHECK-NEXT: add r2, r1, #8 ; CHECK-NEXT: str r2, [sp] ; CHECK-NEXT: ldr r1, [r1, #4] -; CHECK-NEXT: add sp, sp, #4 -; CHECK-NEXT: add sp, sp, #12 +; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: bx lr entry: %g = alloca i8*, align 4 @@ -34,10 +31,8 @@ define double @test2(i32 %a, i32* %b, ...) nounwind optsize { ; CHECK-LABEL: test2: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, sp, #8 -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, sp, #4 +; CHECK-NEXT: .pad #12 +; CHECK-NEXT: sub sp, sp, #12 ; CHECK-NEXT: add r0, sp, #4 ; CHECK-NEXT: stmib sp, {r2, r3} ; CHECK-NEXT: add r0, r0, #11 @@ -47,8 +42,7 @@ ; CHECK-NEXT: str r1, [sp] ; CHECK-NEXT: vldr d16, [r0] ; CHECK-NEXT: vmov r0, r1, d16 -; CHECK-NEXT: add sp, sp, #4 -; CHECK-NEXT: add sp, sp, #8 +; CHECK-NEXT: add sp, sp, #12 ; CHECK-NEXT: bx lr entry: %ap = alloca i8*, align 4 ; [#uses=3] diff --git a/llvm/test/CodeGen/ARM/vargs_align.ll b/llvm/test/CodeGen/ARM/vargs_align.ll --- a/llvm/test/CodeGen/ARM/vargs_align.ll +++ b/llvm/test/CodeGen/ARM/vargs_align.ll @@ -22,8 +22,7 @@ ; EABI: add sp, sp, #4 ; EABI: add sp, sp, #12 -; OABI: add sp, sp, #12 -; OABI: add sp, sp, #12 +; OABI: add sp, sp, #24 } declare void @llvm.va_start(i8*) nounwind diff --git a/llvm/test/CodeGen/Thumb2/bti-pac-replace-1.mir b/llvm/test/CodeGen/Thumb2/bti-pac-replace-1.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/bti-pac-replace-1.mir @@ -0,0 +1,96 @@ +# RUN: llc --run-pass=arm-branch-targets %s -o - | FileCheck %s +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-unknown-eabi" + + define hidden i32 @_Z1fi(i32 %x) { + entry: + %add = add nsw i32 %x, 1 + %call = tail call i32 @_Z1gi(i32 %add) + %sub = add nsw i32 %call, -1 + ret i32 %sub + } + + declare dso_local i32 @_Z1gi(i32) + + !llvm.module.flags = !{!0, !1, !2} + + !0 = !{i32 1, !"branch-target-enforcement", i32 1} + !1 = !{i32 1, !"sign-return-address", i32 1} + !2 = !{i32 1, !"sign-return-address-all", i32 0} + +... +--- +name: _Z1fi +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 16 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r12', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + liveins: $r0, $r7, $lr, $r12 + + frame-setup t2PAC implicit-def $r12, implicit $lr, implicit $sp + frame-setup tPUSH 14 /* CC::al */, $noreg, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 8 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + early-clobber $sp = frame-setup t2STR_PRE killed $r12, $sp, -4, 14 /* CC::al */, $noreg + frame-setup CFI_INSTRUCTION def_cfa_offset 12 + frame-setup CFI_INSTRUCTION offset $ra_auth_code, -12 + $sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg + frame-setup CFI_INSTRUCTION def_cfa_offset 16 + renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @_Z1gi, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0 + renamable $r0, dead $cpsr = nsw tSUBi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg + $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg + $r12, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg + $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr + t2AUT implicit $r12, implicit $lr, implicit $sp + tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 + +... +# Check PAC is replaces with PACBTI and it has the frame-setup flag +# CHECK-LABEL: bb.0.entry: +# CHECK: frame-setup t2PACBTI \ No newline at end of file diff --git a/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/bti-pac-replace-2.ll @@ -0,0 +1,43 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +; RUN: llc --filetype=obj %s -o - | llvm-readelf -u - | FileCheck %s --check-prefix=UNWIND +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-arm-unknown-eabi" + +define hidden i32 @_Z1fi(i32 %x) { +entry: + %add = add nsw i32 %x, 1 + %call = tail call i32 @_Z1gi(i32 %add) + %sub = add nsw i32 %call, -1 + ret i32 %sub +} + +declare dso_local i32 @_Z1gi(i32) + +!llvm.module.flags = !{!0, !1, !2} +!0 = !{i32 1, !"branch-target-enforcement", i32 1} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} + +; Check the function starts with `pacbti` and correct unwind info is emitted +; CHECK-LABEL: _Z1fi: +; ... +; CHECK: pacbti r12, lr, sp +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset ra_auth_code, -12 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; ... + +; UNWIND-LABEL: Opcodes [ +; UNWIND-NEXT: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} +; UNWIND-NEXT: 0xB0 ; finish diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-basic.ll @@ -0,0 +1,124 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8m.main-none-none-eabi" + +; int g(int); +; +; #if __ARM_FEATURE_CMSE == 3 +; #define ENTRY __attribute__((cmse_nonsecure_entry)) +; #else +; #define ENTRY +; #endif +; +; ENTRY int f(int x) { +; return 1 + g(x - 1); +; } + +define hidden i32 @f0(i32 %x) local_unnamed_addr { +entry: + %sub = add nsw i32 %x, -1 + %call = tail call i32 @g(i32 %sub) + %add = add nsw i32 %call, 1 + ret i32 %add +} + +; CHECK-LABEL: f0: +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset ra_auth_code, -12 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; ... +; CHECK: add sp, #4 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r7, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr + +define hidden i32 @f1(i32 %x) local_unnamed_addr #0 { +entry: + %sub = add nsw i32 %x, -1 + %call = tail call i32 @g(i32 %sub) + %add = add nsw i32 %call, 1 + ret i32 %add +} + +; CHECK-LABEL: f1: +; CHECK: pac r12, lr, sp +; CHECK-NEXT: vstr fpcxtns, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 4 +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK: vldr fpcxtns, [sp], #4 +; CHECK: aut r12, lr, sp + +define hidden i32 @f2(i32 %x) local_unnamed_addr #1 { +entry: + %sub = add nsw i32 %x, -1 + %call = tail call i32 @g(i32 %sub) + %add = add nsw i32 %call, 1 + ret i32 %add +} +; CHECK-LABEL: f2: +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset ra_auth_code, -12 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; ... +; CHECK: add sp, #4 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r7, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: mrs r12, control +; ... +; CHECK: bxns lr + +declare dso_local i32 @g(i32) local_unnamed_addr + +attributes #0 = { "cmse_nonsecure_entry" "target-features"="+8msecext,+armv8.1-m.main"} +attributes #1 = { "cmse_nonsecure_entry" "target-features"="+8msecext,+armv8-m.main,+fp-armv8d16"} + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} + +; UNWIND-LABEL: FunctionAddress: 0x0 +; UNWIND: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} +; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-NEXT: 0xB0 ; finish + +; UNWIND-LABEL: FunctionAddress: 0x24 +; UNWIND: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} + +; UNWIND-LABEL: FunctionAddress: 0x54 +; UNWIND: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} +; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-NEXT: 0xB0 ; finish + +; UNWIND-LABEL: 00000001 {{.*}} f0 +; UNWIND-LABEL: 00000025 {{.*}} f1 +; UNWIND-LABEL: 00000055 {{.*}} f2 diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-indirect-tail-call.ll @@ -0,0 +1,37 @@ +; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK1 +; RUN: llc %s -o - | FileCheck %s --check-prefix=CHECK2 +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-arm-unknown-eabi" + +@p = hidden local_unnamed_addr global i32 (i32, i32, i32, i32)* null, align 4 + +define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: + %call = tail call i32 @g(i32 %a) #0 + %0 = load i32 (i32, i32, i32, i32)*, i32 (i32, i32, i32, i32)** @p, align 4 + %call1 = tail call i32 %0(i32 %call, i32 %b, i32 %c, i32 %d) #0 + ret i32 %call1 +} + +; CHECK1-LABEL: f +; ... +; CHECK1: aut r12, lr, sp +; CHECK1-NOT: bx r12 + +; CHECK2-LABEL: f +; ... +; CHECK2: blx r4 +; CHECK2-NEXT: ldr r12, [sp], #4 +; CHECK2-NEXT: pop.w {r4, r5, r6, r7, lr} +; CHECK2-NEXT: aut r12, lr, sp +; CHECK2-NEXT: bx lr + +declare dso_local i32 @g(i32) local_unnamed_addr #0 + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-1.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-1.ll @@ -0,0 +1,144 @@ +; RUN: llc --force-dwarf-frame-section --exception-model=arm %s -o - | FileCheck %s +; RUN: llc --filetype=obj %s --exception-model=arm -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +; Triple tweaked so we get 16-byte stack alignment and better test coverage. +target triple = "armv7m-none-nacl-android" + +; -Oz +; volatile int a, b, c, d, e, f, g, h, i; +; +; int x() { +; int r = (a + b) / (c + d) + e + f / g + h + i; +; return r + 1; +; } +; +; int y() { +; int r = (a + b) / (c + d) + e + f / g + h + i; +; return r + 2; +; } + +@a = hidden global i32 0, align 4 +@b = hidden global i32 0, align 4 +@c = hidden global i32 0, align 4 +@d = hidden global i32 0, align 4 +@e = hidden global i32 0, align 4 +@f = hidden global i32 0, align 4 +@g = hidden global i32 0, align 4 +@h = hidden global i32 0, align 4 +@i = hidden global i32 0, align 4 + + +define hidden i32 @x() local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %5 = load volatile i32, i32* @f, align 4 + %6 = load volatile i32, i32* @g, align 4 + %div3 = sdiv i32 %5, %6 + %7 = load volatile i32, i32* @h, align 4 + %8 = load volatile i32, i32* @i, align 4 + %add2 = add i32 %div, 1 + %add4 = add i32 %add2, %4 + %add5 = add i32 %add4, %div3 + %add6 = add i32 %add5, %7 + %add7 = add i32 %add6, %8 + ret i32 %add7 +} + +; CHECK-LABEL: x: +; CHECK: ldr r0, .LCPI0_0 +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: .save {ra_auth_code, lr} +; CHECK-NEXT: strd r12, lr, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -12 +; CHECK-NEXT: .cfi_offset ra_auth_code, -16 +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: ldrd r12, lr, [sp], #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore lr +; CHECK-NEXT: .cfi_undefined ra_auth_code +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: bx lr + +define hidden i32 @y() local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %5 = load volatile i32, i32* @f, align 4 + %6 = load volatile i32, i32* @g, align 4 + %div3 = sdiv i32 %5, %6 + %7 = load volatile i32, i32* @h, align 4 + %8 = load volatile i32, i32* @i, align 4 + %add2 = add i32 %div, 2 + %add4 = add i32 %add2, %4 + %add5 = add i32 %add4, %div3 + %add6 = add i32 %add5, %7 + %add7 = add i32 %add6, %8 + ret i32 %add7 +} +; CHECK-LABEL: y: +; CHECK: ldr r0, .LCPI1_0 +; CHECK-NEXT: pac r12, lr, sp +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: .save {ra_auth_code, lr} +; CHECK-NEXT: strd r12, lr, [sp, #-16]! +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -12 +; CHECK-NEXT: .cfi_offset ra_auth_code, -16 +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: ldrd r12, lr, [sp], #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore lr +; CHECK-NEXT: .cfi_undefined ra_auth_code +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr + +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK-NOT: pac +; CHECK-NOT: aut +; CHECK-NOT: r12 +; CHECK: bx lr + +attributes #0 = { minsize nofree norecurse nounwind optsize uwtable} + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} + +; UNWIND-LABEL: FunctionAddress: 0x0 +; UNWIND: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} +; UNWIND-NEXT: 0x01 ; vsp = vsp + 8 +; UNWIND-NEXT: 0xB0 ; finish + +; UNWIND-LABEL: FunctionAddress: 0x20 +; UNWIND: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x00 ; pop {lr} +; UNWIND-NEXT: 0x01 ; vsp = vsp + 8 +; UNWIND-NEXT: 0xB0 ; finish + +; UNWIND-LABEL: FunctionAddress: 0x40 +; UNWIND: Model: CantUnwind + +; UNWINND-LABEL: 00000041 {{.*}} OUTLINED_FUNCTION_0 +; UNWINND-LABEL: 00000001 {{.*}} x +; UNWINND-LABEL: 00000021 {{.*}} y diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-2.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-2.ll @@ -0,0 +1,89 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; -Oz +; volatile int a, b, c, d, e, f; +; +; int x() { +; int r = a + b + c + d + e + f; +; return r + 1; +; } +; +; int y() { +; int r = a + b + c + d + e + f; +; return r + 2; +; } + +@a = hidden global i32 0, align 4 +@b = hidden global i32 0, align 4 +@c = hidden global i32 0, align 4 +@d = hidden global i32 0, align 4 +@e = hidden global i32 0, align 4 +@f = hidden global i32 0, align 4 + +define hidden i32 @x() local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %4 = load volatile i32, i32* @e, align 4 + %5 = load volatile i32, i32* @f, align 4 + %add = add i32 %0, 1 + %add1 = add i32 %add, %1 + %add2 = add i32 %add1, %2 + %add3 = add i32 %add2, %3 + %add4 = add i32 %add3, %4 + %add5 = add i32 %add4, %5 + ret i32 %add5 +} +; CHECK-LABEL: x: +; CHECK: ldr r{{.*}}, .LCPI0_0 +; CHECK-NEXT: mov r[[A:[0-9]*]], lr +; CHECK-NEXT: .cfi_register lr, r[[A]] +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: mov lr, r[[A]] +; CHECK-NEXT: .cfi_restore lr +; CHECK-NEXT: adds r0, #1 +; CHECK-NEXT: bx lr + +define hidden i32 @y() local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %4 = load volatile i32, i32* @e, align 4 + %5 = load volatile i32, i32* @f, align 4 + %add = add i32 %0, 2 + %add1 = add i32 %add, %1 + %add2 = add i32 %add1, %2 + %add3 = add i32 %add2, %3 + %add4 = add i32 %add3, %4 + %add5 = add i32 %add4, %5 + ret i32 %add5 +} +; CHECK-LABEL: y: +; CHECK: ldr r{{.*}}, .LCPI1_0 +; CHECK-NEXT: mov r[[B:[0-9]*]], lr +; CHECK-NEXT: .cfi_register lr, r[[B]] +; CHECK-NEXT: bl OUTLINED_FUNCTION_0 +; CHECK-NEXT: mov lr, r[[B]] +; CHECK-NEXT: .cfi_restore lr +; CHECK-NEXT: adds r0, #2 +; CHECK-NEXT: bx lr + +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK-NOT: pac +; CHECK-NOT: aut +; CHECK-NOT: r12 +; CHECK: bx lr + +attributes #0 = { minsize nofree norecurse nounwind optsize} + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-3.ll @@ -0,0 +1,166 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; -Oz +; __attribute__((noinline)) int h(int a, int b) { return a + b; } +; +; int f(int a, int b, int c, int d) { +; if (a < 0) +; return -1; +; a = h(11 * a - b, b); +; return 2 + a * (a + b) / (c + d); +; } +; +; int g(int a, int b, int c, int d) { +; if (a < 0) +; return -1; +; a = h(11 * a - b, b); +; return 1 + a * (a + b) / (c + d); +; } + +define hidden i32 @h(i32 %a, i32 %b) local_unnamed_addr #0 { +entry: + %add = add nsw i32 %b, %a + ret i32 %add +} + +define hidden i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: + %cmp = icmp slt i32 %a, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %mul = mul nsw i32 %a, 11 + %sub = sub nsw i32 %mul, %b + %call = tail call i32 @h(i32 %sub, i32 %b) + %add = add nsw i32 %call, %b + %mul1 = mul nsw i32 %add, %call + %add2 = add nsw i32 %d, %c + %div = sdiv i32 %mul1, %add2 + %add3 = add nsw i32 %div, 2 + br label %return + +return: ; preds = %entry, %if.end + %retval.0 = phi i32 [ %add3, %if.end ], [ -1, %entry ] + ret i32 %retval.0 +} + +; CHECK-LABEL: f: +; CHECK: bmi .LBB +; ... +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r6, -8 +; CHECK-NEXT: .cfi_offset r5, -12 +; CHECK-NEXT: .cfi_offset r4, -16 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset ra_auth_code, -20 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; ... +; CHECK: add sp, #4 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r4, r5, r6, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +; ... +; CHECK: .LBB +; CHECK: bx lr + + +define hidden i32 @g(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: + %cmp = icmp slt i32 %a, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %mul = mul nsw i32 %a, 11 + %sub = sub nsw i32 %mul, %b + %call = tail call i32 @h(i32 %sub, i32 %b) + %add = add nsw i32 %call, %b + %mul1 = mul nsw i32 %add, %call + %add2 = add nsw i32 %d, %c + %div = sdiv i32 %mul1, %add2 + %add3 = add nsw i32 %div, 1 + br label %return + +return: ; preds = %entry, %if.end + %retval.0 = phi i32 [ %add3, %if.end ], [ -1, %entry ] + ret i32 %retval.0 +} +; CHECK-LABEL: g: +; CHECK: bmi .LBB +; ... +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r6, -8 +; CHECK-NEXT: .cfi_offset r5, -12 +; CHECK-NEXT: .cfi_offset r4, -16 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset ra_auth_code, -20 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; ... +; CHECK: add sp, #4 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r4, r5, r6, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +; ... +; CHECK: .LBB +; CHECK: bx lr + +; CHECK-LABEL: OUTLINED_FUNCTION_0: +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {ra_auth_code, lr} +; CHECK-NEXT: strd r12, lr, [sp, #-8]! +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset ra_auth_code, -8 +; ... +; CHECK: ldrd r12, lr, [sp], #8 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore lr +; CHECK-NEXT: .cfi_undefined ra_auth_code +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr + +attributes #0 = { minsize noinline norecurse nounwind optsize readnone uwtable } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} + + +; UNWIND-LABEL: FunctionAddress: 0x4 +; UNWIND: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr} + +; UNWIND-LABEL: FunctionAddress: 0x30 +; UNWIND: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr} + +; UNWIND-LABEL: FunctionAddress: 0x5C +; UNWIND: Model: CantUnwind + +; UNWIND-LABEL: 0000005d {{.*}} OUTLINED_FUNCTION_0 +; UNWIND-LABEL: 00000005 {{.*}} f +; UNWIND-LABEL: 00000031 {{.*}} g diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-4.ll @@ -0,0 +1,219 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +; RUN: llc --filetype=obj %s -o - | llvm-readelf -s --unwind - | FileCheck %s --check-prefix=UNWIND +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; C++, -Oz +; __attribute__((noinline)) int h(int a, int b) { +; if (a < 0) +; throw 1; +; return a + b; +; } +; +; int f(int a, int b, int c, int d) { +; if (a < 0) +; return -1; +; a = h(a, b); +; return 2 + a * (a + b) / (c + d); +; } +; +; int g(int a, int b, int c, int d) { +; if (a < 0) +; return -1; +; a = h(a, b); +; return 1 + a * (a + b) / (c + d); +; } + +@_ZTIi = external dso_local constant i8* + +define hidden i32 @_Z1hii(i32 %a, i32 %b) local_unnamed_addr #0 { +entry: + %cmp = icmp slt i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %exception = tail call i8* @__cxa_allocate_exception(i32 4) #1 + %0 = bitcast i8* %exception to i32* + store i32 1, i32* %0, align 8 + tail call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #2 + unreachable + +if.end: ; preds = %entry + %add = add nsw i32 %b, %a + ret i32 %add +} + +; CHECK-LABEL: _Z1hii: +; ... +; CHECK: bxgt lr +; ... +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .cfi_offset ra_auth_code, -12 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; ... +; CHECK-NOT: pac +; CHECK-NOT: aut +; CHECK: .cfi_endproc + +declare dso_local i8* @__cxa_allocate_exception(i32) local_unnamed_addr + +declare dso_local void @__cxa_throw(i8*, i8*, i8*) local_unnamed_addr + +define hidden i32 @_Z1fiiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: + %cmp = icmp slt i32 %a, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %call = tail call i32 @_Z1hii(i32 %a, i32 %b) + %add = add nsw i32 %call, %b + %mul = mul nsw i32 %add, %call + %add1 = add nsw i32 %d, %c + %div = sdiv i32 %mul, %add1 + %add2 = add nsw i32 %div, 2 + br label %return + +return: ; preds = %entry, %if.end + %retval.0 = phi i32 [ %add2, %if.end ], [ -1, %entry ] + ret i32 %retval.0 +} + +; CHECK-LABEL: _Z1fiiii: +; ... +; CHECK: bmi .L[[B:[a-zA-Z0-9]*]] +; ... +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r6, -8 +; CHECK-NEXT: .cfi_offset r5, -12 +; CHECK-NEXT: .cfi_offset r4, -16 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset ra_auth_code, -20 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; ... +; CHECK: add sp, #4 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r4, r5, r6, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +; ... +; CHECK: .L[[B]] +; ... +; CHECK: bx lr + + + +define hidden i32 @_Z1giiii(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 { +entry: + %cmp = icmp slt i32 %a, 0 + br i1 %cmp, label %return, label %if.end + +if.end: ; preds = %entry + %call = tail call i32 @_Z1hii(i32 %a, i32 %b) + %add = add nsw i32 %call, %b + %mul = mul nsw i32 %add, %call + %add1 = add nsw i32 %d, %c + %div = sdiv i32 %mul, %add1 + %add2 = add nsw i32 %div, 1 + br label %return + +return: ; preds = %entry, %if.end + %retval.0 = phi i32 [ %add2, %if.end ], [ -1, %entry ] + ret i32 %retval.0 +} + +; CHECK-LABEL: _Z1giiii: +; ... +; CHECK: bmi .L[[B:[a-zA-Z0-9]*]] +; ... +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, lr} +; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r6, -8 +; CHECK-NEXT: .cfi_offset r5, -12 +; CHECK-NEXT: .cfi_offset r4, -16 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset ra_auth_code, -20 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; ... +; CHECK: add sp, #4 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r4, r5, r6, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr +; ... +; CHECK: .L[[B]] +; ... +; CHECK: bx lr + + +; CHEK-LABEL: OUTLINED_FUNCTION_0: +; CHECK-NOT: pac +; CHECK-NOT: aut +; CHECK: b _Z1hii + +attributes #0 = { minsize noinline optsize "denormal-fp-math"="preserve-sign,preserve-sign" "denormal-fp-math-f32"="ieee,ieee" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-m3" "target-features"="+armv7-m,+hwdiv,+thumb-mode" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind } +attributes #2 = { noreturn } + + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} + + +; UNWIND-LABEL: FunctionAddress: 0x0 +; UNWIND: Opcodes +; UNWIND-NEXT: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x08 ; pop {r7, lr} +; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-NEXT: 0xB0 ; finish + +; UNWIND-LABEL: FunctionAddress: 0x2C +; UNWIND: Opcodes +; UNWIND-NEXT: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr} + +; UNWIND-LABEL: FunctionAddress: 0x62 +; UNWIND: Opcodes +; UNWIND-NEXT: 0x00 ; vsp = vsp + 4 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0xAA ; pop {r4, r5, r6, lr} + +; UNWIND-LABEL: FunctionAddress: 0x98 +; UNWIND: Opcodes +; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-NEXT: 0xB0 ; finish +; UNWIND-NEXT: 0xB0 ; finish + +; UNWIND: 00000099 {{.*}} OUTLINED_FUNCTION_0 +; UWNIND: 0000002d {{.*}} _Z1fiiii +; UWNIND: 00000063 {{.*}} _Z1giiii +; UWNIND: 00000001 {{.*}} _Z1hii diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-outliner-5.ll @@ -0,0 +1,98 @@ +; RUN: llc %s -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; CHECK-LABEL: x: +; CHECK: bl OUTLINED_FUNCTION +; CHECK-LABEL: y: +; CHECK: bl OUTLINED_FUNCTION +; CHECK-LABEL: z: +; CHECK-NOT: bl OUTLINED_FUNCTION + +@a = hidden global i32 0, align 4 +@b = hidden global i32 0, align 4 +@c = hidden global i32 0, align 4 +@d = hidden global i32 0, align 4 +@e = hidden global i32 0, align 4 +@f = hidden global i32 0, align 4 +@g = hidden global i32 0, align 4 +@h = hidden global i32 0, align 4 +@i = hidden global i32 0, align 4 + +define hidden i32 @x() local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %5 = load volatile i32, i32* @f, align 4 + %6 = load volatile i32, i32* @g, align 4 + %div3 = sdiv i32 %5, %6 + %7 = load volatile i32, i32* @h, align 4 + %8 = load volatile i32, i32* @i, align 4 + %add2 = add i32 %div, 1 + %add4 = add i32 %add2, %4 + %add5 = add i32 %add4, %div3 + %add6 = add i32 %add5, %7 + %add7 = add i32 %add6, %8 + ret i32 %add7 +} + +define hidden i32 @y() local_unnamed_addr #0 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %5 = load volatile i32, i32* @f, align 4 + %6 = load volatile i32, i32* @g, align 4 + %div3 = sdiv i32 %5, %6 + %7 = load volatile i32, i32* @h, align 4 + %8 = load volatile i32, i32* @i, align 4 + %add2 = add i32 %div, 2 + %add4 = add i32 %add2, %4 + %add5 = add i32 %add4, %div3 + %add6 = add i32 %add5, %7 + %add7 = add i32 %add6, %8 + ret i32 %add7 +} + +define hidden i32 @z() local_unnamed_addr #1 { +entry: + %0 = load volatile i32, i32* @a, align 4 + %1 = load volatile i32, i32* @b, align 4 + %add = add nsw i32 %1, %0 + %2 = load volatile i32, i32* @c, align 4 + %3 = load volatile i32, i32* @d, align 4 + %add1 = add nsw i32 %3, %2 + %div = sdiv i32 %add, %add1 + %4 = load volatile i32, i32* @e, align 4 + %5 = load volatile i32, i32* @f, align 4 + %6 = load volatile i32, i32* @g, align 4 + %div3 = sdiv i32 %5, %6 + %7 = load volatile i32, i32* @h, align 4 + %8 = load volatile i32, i32* @i, align 4 + %add2 = add i32 %div, 3 + %add4 = add i32 %add2, %4 + %add5 = add i32 %add4, %div3 + %add6 = add i32 %add5, %7 + %add7 = add i32 %add6, %8 + ret i32 %add7 +} + +attributes #0 = { minsize nofree norecurse nounwind optsize } +attributes #1 = { minsize nofree norecurse nounwind optsize "sign-return-address"="none" } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-overalign.ll @@ -0,0 +1,71 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +; RUN: llc --filetype=obj %s -o - | llvm-readelf --unwind - | FileCheck %s --check-prefix=UNWIND +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-arm-none-eabi" + +; int g(int, int *); +; +; int f() { +; __attribute__((aligned(32))) int a[4]; +; g(4, a); +; int s = 0; +; for (int i = 0; i < 4; ++i) +; s += a[i]; +; return s; +; } + +define hidden i32 @_Z1fv() local_unnamed_addr { +entry: + %a = alloca [4 x i32], align 32 + %0 = bitcast [4 x i32]* %a to i8* + %arraydecay = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i32 0 + %call = call i32 @_Z1giPi(i32 4, i32* nonnull %arraydecay) + %1 = load i32, i32* %arraydecay, align 32 + %arrayidx.1 = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i32 1 + %2 = load i32, i32* %arrayidx.1, align 4 + %add.1 = add nsw i32 %2, %1 + %arrayidx.2 = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i32 2 + %3 = load i32, i32* %arrayidx.2, align 8 + %add.2 = add nsw i32 %3, %add.1 + %arrayidx.3 = getelementptr inbounds [4 x i32], [4 x i32]* %a, i32 0, i32 3 + %4 = load i32, i32* %arrayidx.3, align 4 + %add.3 = add nsw i32 %4, %add.2 + ret i32 %add.3 +} + +; CHECK-LABEL: _Z1fv: +; CHECK: pac r12, lr, sp +; CHECK: .save {r4, r6, r7, lr} +; CHECK-NEXT: push {r4, r6, r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .cfi_offset r6, -12 +; CHECK-NEXT: .cfi_offset r4, -16 +; CHECK-NEXT: .setfp r7, sp, #8 +; CHECK-NEXT: add r7, sp, #8 +; CHECK-NEXT: .cfi_def_cfa r7, 8 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_offset ra_auth_code, -20 +; CHECK-NEXT: .pad #44 +; CHECK-NEXT: sub sp, #44 +; CHECK: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r4, r6, r7, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr + + +declare dso_local i32 @_Z1giPi(i32, i32*) local_unnamed_addr + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} + +; UNWIND-LABEL: FunctionAddress: 0x0 +; UNWIND: 0x97 ; vsp = r7 +; UNWIND: 0x42 ; vsp = vsp - 12 +; UNWIND: 0xB4 ; pop ra_auth_code +; UNWIND: 0x84 0x0D ; pop {r4, r6, r7, lr} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-unsupported-arch.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-unsupported-arch.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-unsupported-arch.ll @@ -0,0 +1,31 @@ +; RUN: llc -mtriple thumbv6m-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple thumbv8m.base-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple thumbv7a-eabi %s -o - | FileCheck %s +; RUN: llc -mtriple thumbv7m-eabi %s -o - | FileCheck %s --check-prefix=CHECK-PACBTI + +; Check we don't emit PACBTI-M instructions for architectures +; that do not support them. +define hidden i32 @f(i32 %x) #0 { +entry: + %x.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + %0 = load i32, i32* %x.addr, align 4 + %sub = sub nsw i32 1, %0 + %call = call i32 @g(i32 %sub) + %add = add nsw i32 1, %call + ret i32 %add +} +; CHECK-LABEL: f: +; CHECK-NOT: bti + +; CHECK-PACBTI-LABEL: f: +; CHECK-PACBTI: pacbti +declare dso_local i32 @g(i32) + +attributes #0 = { noinline nounwind } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 1} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-1.ll @@ -0,0 +1,77 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-arm-none-eabi" + +%"struct.std::__va_list" = type { i8* } + +define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 { +entry: + %ap = alloca %"struct.std::__va_list", align 4 + %0 = bitcast %"struct.std::__va_list"* %ap to i8* + call void @llvm.va_start(i8* nonnull %0) + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: ; preds = %entry + %1 = getelementptr inbounds %"struct.std::__va_list", %"struct.std::__va_list"* %ap, i32 0, i32 0 + %argp.cur.pre = load i8*, i8** %1, align 4 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %s.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + call void @llvm.va_end(i8* nonnull %0) + ret i32 %s.0.lcssa + +for.body: ; preds = %for.body.lr.ph, %for.body + %argp.cur = phi i8* [ %argp.cur.pre, %for.body.lr.ph ], [ %argp.next, %for.body ] + %i.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %s.08 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4 + store i8* %argp.next, i8** %1, align 4 + %2 = bitcast i8* %argp.cur to i32* + %3 = load i32, i32* %2, align 4 + %add = add nsw i32 %3, %s.08 + %inc = add nuw nsw i32 %i.09, 1 + %exitcond.not = icmp eq i32 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: _Z1fiz: +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .pad #12 +; CHECK-NEXT: sub sp, #12 +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset lr, -16 +; CHECK-NEXT: .cfi_offset r7, -20 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 24 +; CHECK-NEXT: .cfi_offset ra_auth_code, -24 +; CHECK-NEXT: .pad #4 +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .cfi_def_cfa_offset 28 +; ... +; CHECK: add.w r[[N:[0-9]*]], sp, #16 +; CHECK: stm.w r[[N]], {r1, r2, r3} +; ... +; CHECK: add sp, #4 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r7, lr} +; CHECK-NEXT: add sp, #12 +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr + +declare void @llvm.va_start(i8*) #1 +declare void @llvm.va_end(i8*) #1 + +attributes #0 = { nounwind optsize} +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-varargs-2.ll @@ -0,0 +1,101 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +; RUN: llc --filetype=obj %s -o - | llvm-readelf --unwind - | FileCheck %s --check-prefix=UNWIND +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-arm-none-eabi" + +; C++ +; int g(int); +; +; int f(int n, ...) { +; __builtin_va_list ap; +; __builtin_va_start(ap, n); +; int s = 0; +; for (int i = 0; i < n; ++i) +; s += g(__builtin_va_arg(ap, int)); +; __builtin_va_end(ap); +; return s; +; } + +%"struct.std::__va_list" = type { i8* } + +define hidden i32 @_Z1fiz(i32 %n, ...) local_unnamed_addr #0 { +entry: + %ap = alloca %"struct.std::__va_list", align 4 + %0 = bitcast %"struct.std::__va_list"* %ap to i8* + call void @llvm.va_start(i8* nonnull %0) + %cmp7 = icmp sgt i32 %n, 0 + br i1 %cmp7, label %for.body.lr.ph, label %for.cond.cleanup + +for.body.lr.ph: ; preds = %entry + %1 = getelementptr inbounds %"struct.std::__va_list", %"struct.std::__va_list"* %ap, i32 0, i32 0 + br label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + %s.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ] + call void @llvm.va_end(i8* nonnull %0) + ret i32 %s.0.lcssa + +for.body: ; preds = %for.body.lr.ph, %for.body + %i.09 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ] + %s.08 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ] + %argp.cur = load i8*, i8** %1, align 4 + %argp.next = getelementptr inbounds i8, i8* %argp.cur, i32 4 + store i8* %argp.next, i8** %1, align 4 + %2 = bitcast i8* %argp.cur to i32* + %3 = load i32, i32* %2, align 4 + %call = call i32 @_Z1gi(i32 %3) + %add = add nsw i32 %call, %s.08 + %inc = add nuw nsw i32 %i.09, 1 + %exitcond.not = icmp eq i32 %inc, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} + +; CHECK-LABEL: _Z1fiz: +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .pad #12 +; CHECK-NEXT: sub sp, #12 +; CHECK-NEXT: .cfi_def_cfa_offset 12 +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 28 +; CHECK-NEXT: .cfi_offset lr, -16 +; CHECK-NEXT: .cfi_offset r7, -20 +; CHECK-NEXT: .cfi_offset r5, -24 +; CHECK-NEXT: .cfi_offset r4, -28 +; CHECK-NEXT: .save {ra_auth_code} +; CHECK-NEXT: str r12, [sp, #-4]! +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset ra_auth_code, -32 +; CHECK-NEXT: .pad #8 +; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: .cfi_def_cfa_offset 40 +; ... +; CHECK: add r[[N:[0-9]*]], sp, #28 +; CHECK: stm r[[N]]!, {r1, r2, r3} +; ... +; CHECK: add sp, #8 +; CHECK-NEXT: ldr r12, [sp], #4 +; CHECK-NEXT: pop.w {r4, r5, r7, lr} +; CHECK-NEXT: add sp, #12 +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr + +declare void @llvm.va_start(i8*) #1 +declare void @llvm.va_end(i8*) #1 + +declare dso_local i32 @_Z1gi(i32) local_unnamed_addr + +attributes #0 = { optsize } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} + +; UNWIND-LABEL: FunctionAddress +; UNWIND: 0x01 ; vsp = vsp + 8 +; UNWIND-NEXT: 0xB4 ; pop ra_auth_code +; UNWIND-NEXT: 0x84 0x0B ; pop {r4, r5, r7, lr} +; UNWIND-NEXT: 0x02 ; vsp = vsp + 12 diff --git a/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/pacbti-m-vla.ll @@ -0,0 +1,125 @@ +; RUN: llc --force-dwarf-frame-section %s -o - | FileCheck %s +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv8.1m.main-arm-none-eabi" + +; int g(int, int *); +; +; int f(int n) { +; int a[n]; +; g(n, a); +; int s = 0; +; for (int i = 0; i < n; ++i) +; s += a[i]; +; return s; +; } + +define hidden i32 @f(i32 %n) local_unnamed_addr #0 { +entry: + %vla = alloca i32, i32 %n, align 4 + %call = call i32 @g(i32 %n, i32* nonnull %vla) #0 + %cmp8 = icmp sgt i32 %n, 0 + br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %0 = add i32 %n, -1 + %xtraiter = and i32 %n, 3 + %1 = icmp ult i32 %0, 3 + br i1 %1, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body.preheader.new + +for.body.preheader.new: ; preds = %for.body.preheader + %unroll_iter = and i32 %n, -4 + br label %for.body + +for.cond.cleanup.loopexit.unr-lcssa: ; preds = %for.body, %for.body.preheader + %add.lcssa.ph = phi i32 [ undef, %for.body.preheader ], [ %add.3, %for.body ] + %i.010.unr = phi i32 [ 0, %for.body.preheader ], [ %inc.3, %for.body ] + %s.09.unr = phi i32 [ 0, %for.body.preheader ], [ %add.3, %for.body ] + %lcmp.mod.not = icmp eq i32 %xtraiter, 0 + br i1 %lcmp.mod.not, label %for.cond.cleanup, label %for.body.epil + +for.body.epil: ; preds = %for.cond.cleanup.loopexit.unr-lcssa + %arrayidx.epil = getelementptr inbounds i32, i32* %vla, i32 %i.010.unr + %2 = load i32, i32* %arrayidx.epil, align 4 + %add.epil = add nsw i32 %2, %s.09.unr + %epil.iter.cmp.not = icmp eq i32 %xtraiter, 1 + br i1 %epil.iter.cmp.not, label %for.cond.cleanup, label %for.body.epil.1 + +for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit.unr-lcssa, %for.body.epil.2, %for.body.epil.1, %for.body.epil, %entry + %s.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa.ph, %for.cond.cleanup.loopexit.unr-lcssa ], [ %add.epil, %for.body.epil ], [ %add.epil.1, %for.body.epil.1 ], [ %add.epil.2, %for.body.epil.2 ] + ret i32 %s.0.lcssa + +for.body: ; preds = %for.body, %for.body.preheader.new + %i.010 = phi i32 [ 0, %for.body.preheader.new ], [ %inc.3, %for.body ] + %s.09 = phi i32 [ 0, %for.body.preheader.new ], [ %add.3, %for.body ] + %niter = phi i32 [ %unroll_iter, %for.body.preheader.new ], [ %niter.nsub.3, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %vla, i32 %i.010 + %3 = load i32, i32* %arrayidx, align 4 + %add = add nsw i32 %3, %s.09 + %inc = or i32 %i.010, 1 + %arrayidx.1 = getelementptr inbounds i32, i32* %vla, i32 %inc + %4 = load i32, i32* %arrayidx.1, align 4 + %add.1 = add nsw i32 %4, %add + %inc.1 = or i32 %i.010, 2 + %arrayidx.2 = getelementptr inbounds i32, i32* %vla, i32 %inc.1 + %5 = load i32, i32* %arrayidx.2, align 4 + %add.2 = add nsw i32 %5, %add.1 + %inc.2 = or i32 %i.010, 3 + %arrayidx.3 = getelementptr inbounds i32, i32* %vla, i32 %inc.2 + %6 = load i32, i32* %arrayidx.3, align 4 + %add.3 = add nsw i32 %6, %add.2 + %inc.3 = add nuw nsw i32 %i.010, 4 + %niter.nsub.3 = add i32 %niter, -4 + %niter.ncmp.3 = icmp eq i32 %niter.nsub.3, 0 + br i1 %niter.ncmp.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body + +for.body.epil.1: ; preds = %for.body.epil + %inc.epil = add nuw nsw i32 %i.010.unr, 1 + %arrayidx.epil.1 = getelementptr inbounds i32, i32* %vla, i32 %inc.epil + %7 = load i32, i32* %arrayidx.epil.1, align 4 + %add.epil.1 = add nsw i32 %7, %add.epil + %epil.iter.cmp.1.not = icmp eq i32 %xtraiter, 2 + br i1 %epil.iter.cmp.1.not, label %for.cond.cleanup, label %for.body.epil.2 + +for.body.epil.2: ; preds = %for.body.epil.1 + %inc.epil.1 = add nuw nsw i32 %i.010.unr, 2 + %arrayidx.epil.2 = getelementptr inbounds i32, i32* %vla, i32 %inc.epil.1 + %8 = load i32, i32* %arrayidx.epil.2, align 4 + %add.epil.2 = add nsw i32 %8, %add.epil.1 + br label %for.cond.cleanup +} + +; CHECK-LABEL: f: +; CHECK: pac r12, lr, sp +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: .cfi_def_cfa_offset 20 +; CHECK-NEXT: .cfi_offset lr, -4 +; CHECK-NEXT: .cfi_offset r7, -8 +; CHECK-NEXT: .cfi_offset r6, -12 +; CHECK-NEXT: .cfi_offset r5, -16 +; CHECK-NEXT: .cfi_offset r4, -20 +; CHECK-NEXT: .setfp r7, sp, #12 +; CHECK-NEXT: add r7, sp, #12 +; CHECK-NEXT: .cfi_def_cfa r7, 8 +; CHECK-NEXT: .save {r8, r9, ra_auth_code} +; CHECK-NEXT: push.w {r8, r9, r12} +; CHECK-NEXT: .cfi_offset ra_auth_code, -24 +; CHECK-NEXT: .cfi_offset r9, -28 +; CHECK-NEXT: .cfi_offset r8, -32 +; ... +; CHECK: sub.w r[[N:[0-9]*]], r7, #24 +; CHECK-NEXT: mov sp, r[[N]] +; CHECK-NEXT: pop.w {r8, r9, r12} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} +; CHECK-NEXT: aut r12, lr, sp +; CHECK-NEXT: bx lr + +declare dso_local i32 @g(i32, i32*) local_unnamed_addr #0 + +attributes #0 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"branch-target-enforcement", i32 0} +!1 = !{i32 1, !"sign-return-address", i32 1} +!2 = !{i32 1, !"sign-return-address-all", i32 0} diff --git a/llvm/test/MC/ARM/ra-auth-code-errors.s b/llvm/test/MC/ARM/ra-auth-code-errors.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/ARM/ra-auth-code-errors.s @@ -0,0 +1,36 @@ +// RUN: not llvm-mc -triple=thumbv7 %s -o - 2>&1 | FileCheck %s --strict-whitespace + .text + .syntax unified + .code 16 + .thumb_func + .global f +f: + .fnstart + .save {r11-ra_auth_code} +// CHECK: [[# @LINE - 1]]:16: error: pseudo-register not allowed +// CHECK-NEXT: .save {r11-ra_auth_code} +// CHECK-NEXT: ^ + .save {r11, ra_auth_code, r12} +// CHECK: [[# @LINE - 1]]:31: warning: duplicated register (r12) in register list +// CHECK-NEXT: .save {r11, ra_auth_code, r12} +// CHECK-NEXT: ^ + .save {ra_auth_code-r13} +// CHECK: [[# @LINE - 1]]:12: error: pseudo-register not allowed +// CHECK-NEXT: .save {ra_auth_code-r13} +// CHECK-NEXT: ^ + push {ra_auth_code} +// CHECK: [[# @LINE - 1]]:11: error: pseudo-register not allowed +// CHECK-NEXT: push {ra_auth_code} +// CHECK-NEXT: ^ + push {r11, ra_auth_code} +// CHECK: [[# @LINE - 1]]:16: error: pseudo-register not allowed +// CHECK-NEXT: push {r11, ra_auth_code} +// CHECK-NEXT: ^ + push {ra_auth_code, r12} +// CHECK: [[# @LINE - 1]]:11: error: pseudo-register not allowed +// CHECK-NEXT: push {ra_auth_code, r12} +// CHECK-NEXT: ^ + push {ra_auth_code, r13} +// CHECK: [[# @LINE - 1]]:11: error: pseudo-register not allowed +// CHECK-NEXT: push {ra_auth_code, r13} +// CHECK-NEXT: ^ diff --git a/llvm/test/MC/ARM/ra-auth-code.s b/llvm/test/MC/ARM/ra-auth-code.s new file mode 100644 --- /dev/null +++ b/llvm/test/MC/ARM/ra-auth-code.s @@ -0,0 +1,24 @@ +// RUN: llvm-mc -triple=thumbv7 -filetype=obj %s -o - | llvm-readelf -u - | FileCheck %s + + .syntax unified + .code 16 + .thumb_func + .global f +f: + .fnstart + .save {ra_auth_code} + .save {ra_auth_code, r13} + .save {r11, ra_auth_code, r13} + .save {r11, ra_auth_code} + .fnend +// CHECK-LABEL: Opcodes [ +// CHECK-NEXT: 0x80 0x80 ; pop {fp} +// CHECK-NEXT: 0xB4 ; pop ra_auth_code +// CHECK-NEXT: 0x80 0x80 ; pop {fp} +// CHECK-NEXT: 0xB4 ; pop ra_auth_code +// CHECK-NEXT: 0x82 0x00 ; pop {sp} +// CHECK-NEXT: 0xB4 ; pop ra_auth_code +// CHECK-NEXT: 0x82 0x00 ; pop {sp} +// CHECK-NEXT: 0xB4 ; pop ra_auth_code +// CHECK-NEXT: 0xB0 ; finish +// CHECK-NEXT: 0xB0 ; finish diff --git a/llvm/test/MC/Disassembler/ARM/sub-sp-imm-thumb2.txt b/llvm/test/MC/Disassembler/ARM/sub-sp-imm-thumb2.txt --- a/llvm/test/MC/Disassembler/ARM/sub-sp-imm-thumb2.txt +++ b/llvm/test/MC/Disassembler/ARM/sub-sp-imm-thumb2.txt @@ -6,8 +6,8 @@ # CHECK: subw sp, sp, #1148 # CHECK-SAME: -# CHECK-NEXT: +# CHECK-NEXT: +# CHECK-NEXT: # CHECK-NEXT: # CHECK-NEXT: # CHECK-NEXT: > @@ -16,8 +16,8 @@ # CHECK: sub.w sp, sp, #1024 # CHECK-SAME: -# CHECK-NEXT: +# CHECK-NEXT: +# CHECK-NEXT: # CHECK-NEXT: # CHECK-NEXT: # CHECK-NEXT: @@ -27,8 +27,8 @@ # CHECK: subs.w sp, sp, #1024 # CHECK-SAME: -# CHECK-NEXT: +# CHECK-NEXT: +# CHECK-NEXT: # CHECK-NEXT: # CHECK-NEXT: # CHECK-NEXT: diff --git a/llvm/test/tools/llvm-readobj/ELF/ARM/unwind.s b/llvm/test/tools/llvm-readobj/ELF/ARM/unwind.s --- a/llvm/test/tools/llvm-readobj/ELF/ARM/unwind.s +++ b/llvm/test/tools/llvm-readobj/ELF/ARM/unwind.s @@ -102,6 +102,7 @@ .unwind_raw 8, 0xa1 .unwind_raw 12, 0xa9 .unwind_raw 0, 0xb0 + .unwind_raw 4, 0xb4 .unwind_raw 4, 0xb1, 0x01 .unwind_raw 0xa04, 0xb2, 0x80, 0x04 .unwind_raw 24, 0xb3, 0x12 @@ -129,12 +130,10 @@ .unwind_raw 0, 0xa0 .unwind_raw 0, 0xa8 .unwind_raw 0, 0xb0 - .unwind_raw 0, 0xb1, 0x00 .unwind_raw 4, 0xb1, 0x01 .unwind_raw 0, 0xb1, 0x10 .unwind_raw 0x204, 0xb2, 0x00 .unwind_raw 16, 0xb3, 0x00 - .unwind_raw 0, 0xb4 .unwind_raw 16, 0xb8 .unwind_raw 4, 0xc0 .unwind_raw 4, 0xc6, 0x00 @@ -277,6 +276,7 @@ @ SYM: 0xB3 0x12 ; pop {d1, d2, d3} @ SYM: 0xB2 0x80 0x04 ; vsp = vsp + 2564 @ SYM: 0xB1 0x01 ; pop {r0} +@ SYM: 0xB4 ; pop ra_auth_code @ SYM: 0xB0 ; finish @ SYM: 0xA9 ; pop {r4, r5, lr} @ SYM: 0xA1 ; pop {r4, r5} @@ -306,12 +306,10 @@ @ SYM: 0xC6 0x00 ; pop {wR0} @ SYM: 0xC0 ; pop {wR10} @ SYM: 0xB8 ; pop {d8} -@ SYM: 0xB4 ; spare @ SYM: 0xB3 0x00 ; pop {d0} @ SYM: 0xB2 0x00 ; vsp = vsp + 516 @ SYM: 0xB1 0x10 ; spare @ SYM: 0xB1 0x01 ; pop {r0} -@ SYM: 0xB1 0x00 ; spare @ SYM: 0xB0 ; finish @ SYM: 0xA8 ; pop {r4, lr} @ SYM: 0xA0 ; pop {r4} diff --git a/llvm/tools/llvm-readobj/ARMEHABIPrinter.h b/llvm/tools/llvm-readobj/ARMEHABIPrinter.h --- a/llvm/tools/llvm-readobj/ARMEHABIPrinter.h +++ b/llvm/tools/llvm-readobj/ARMEHABIPrinter.h @@ -158,9 +158,8 @@ uint8_t Opcode0 = Opcodes[OI++ ^ 3]; uint8_t Opcode1 = Opcodes[OI++ ^ 3]; - SW.startLine() - << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1, - ((Opcode1 & 0xf0) || Opcode1 == 0x00) ? "spare" : "pop "); + SW.startLine() << format("0x%02X 0x%02X ; %s", Opcode0, Opcode1, + (Opcode1 & 0xf0) ? "spare" : "pop "); if (((Opcode1 & 0xf0) == 0x00) && Opcode1) PrintGPR((Opcode1 & 0x0f)); OS << '\n'; @@ -195,7 +194,8 @@ inline void OpcodeDecoder::Decode_101101nn(const uint8_t *Opcodes, unsigned &OI) { uint8_t Opcode = Opcodes[OI++ ^ 3]; - SW.startLine() << format("0x%02X ; spare\n", Opcode); + SW.startLine() << format("0x%02X ; %s\n", Opcode, + (Opcode == 0xb4) ? "pop ra_auth_code" : "spare"); } inline void OpcodeDecoder::Decode_10111nnn(const uint8_t *Opcodes, unsigned &OI) {