Index: llvm/include/llvm/CodeGen/CFIFixup.h =================================================================== --- /dev/null +++ llvm/include/llvm/CodeGen/CFIFixup.h @@ -0,0 +1,38 @@ +//===------ CFIFixup.h - Insert CFI remember/restore instructions ---------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Contains definition of the base CFIFixup pass. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_CFIFIXUP_H +#define LLVM_CODEGEN_CFIFIXUP_H + +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/InitializePasses.h" + +namespace llvm { +class CFIFixup : public MachineFunctionPass { +public: + CFIFixup(char &ID) : MachineFunctionPass(ID) {} + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +protected: + virtual bool skipFunction(MachineFunction &MF) const; + virtual void resetToInitialState(MachineBasicBlock &MBB) const = 0; +}; +} // namespace llvm + +#endif // LLVM_CODEGEN_CFIFIXUP_H Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp =================================================================== --- llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -277,6 +277,12 @@ case MCCFIInstruction::OpUndefined: OutStreamer->emitCFIUndefined(Inst.getRegister()); break; + case MCCFIInstruction::OpRememberState: + OutStreamer->emitCFIRememberState(); + break; + case MCCFIInstruction::OpRestoreState: + OutStreamer->emitCFIRestoreState(); + break; } } Index: llvm/lib/CodeGen/CFIFixup.cpp =================================================================== --- /dev/null +++ llvm/lib/CodeGen/CFIFixup.cpp @@ -0,0 +1,182 @@ +//===------ CFIFixup.cpp - Insert CFI remember/restore instructions -------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass inserts the necessary .cfi_remember_state and +// .cfi_restore_state CFI instructions to adjust for the inconsistency of +// the call-frame information caused by final machine basic block layout. +// This pass relies in constraints LLVM imposes on the placement of +// save/restore points (cf. ShrinkWrap): +// * there is a single basic block, containing the function prologue +// * possibly multiple epilogue blocks, where each epilogue block is +// complete and self-contained, i.e. CSR restore instructions (and the +// corresponding CFI instructions are not split across two or more blocks. +// * prologue and epilogue blocks are outside of any loops +// Thus, during execution, at the beginning and at the end of each basic block +// the function can be in one of two states: +// - "has a call frame", if the function has executed the prologue, or +// has not executed any epilogue +// - "does not have a call frame", if the function has not executed the +// prologue, or has executed an epilogue +// which can be computed by a single RPO traversal. +// From the point of view of the unwind tables, the "has/does not have +// call frame" state at beginning of each block is determined by the state +// at the end of the previous block, in layout order. +// Where these states differ, we insert compensating CFI instructions, which +// come in two flavours: +// - CFI instructions, which reset the unwind table state to the initial one. +// This is done by a target specific hook and is expected to be trivial +// to implement, for example it could be: +// .cfi_def_cfa , 0 +// .cfi_same_value +// .cfi_same_value +// ... +// where are the callee-saved registers. +// - CFI instructions, which reset the unwind table state to the one +// created by the function prologue. These are +// .cfi_restore_state +// .cfi_remember_state +// In this case we also insert a `.cfi_remember_state` after the last CFI +// instruction in the function prologue. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CFIFixup.h" + +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SmallBitVector.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/TargetInstrInfo.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +bool CFIFixup::skipFunction(MachineFunction &MF) const { + return !MF.needsFrameMoves() || + MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || + MF.getFunction().getUWTableKind() != UWTableKind::Async; +} + +static bool containsPrologue(const MachineBasicBlock &MBB) { + return llvm::any_of(MBB.instrs(), [](const auto &MI) { + return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION && + MI.getFlag(MachineInstr::FrameSetup); + }); +} + +static bool containsEpilogue(const MachineBasicBlock &MBB) { + return llvm::any_of(make_range(MBB.rbegin(), MBB.rend()), [](const auto &MI) { + return MI.getOpcode() == TargetOpcode::CFI_INSTRUCTION && + MI.getFlag(MachineInstr::FrameDestroy); + }); +} + +bool CFIFixup::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF)) + return false; + unsigned NumBlocks = MF.size(); + if (NumBlocks < 2) + return false; + + struct BlockFlags { + bool HasFrameOnEntry : 1; + bool HasFrameOnExit : 1; + }; + SmallVector BlockInfo(NumBlocks, {false, false}); + + // Compute the presence/absence of frame at each basic block. + MachineBasicBlock *PrologueBlock = nullptr; + ReversePostOrderTraversal RPOT(&*MF.begin()); + for (auto *MBB : RPOT) { + auto &Info = BlockInfo[MBB->getNumber()]; + + bool HasPrologue = false; + bool HasEpilogue = false; + + if (!PrologueBlock && !Info.HasFrameOnEntry && containsPrologue(*MBB)) { + PrologueBlock = MBB; + HasPrologue = true; + } + + if (Info.HasFrameOnEntry || HasPrologue) + HasEpilogue = containsEpilogue(*MBB); + + // If the function does not have a call frame at the entry of a block and + // the block contains the prologue, then the function has a call frame at + // the exit of the block, unless the block also contained the epilogue. + Info.HasFrameOnExit = (Info.HasFrameOnEntry || HasPrologue) && !HasEpilogue; + + // Set the successors' state on entry. + for (auto *Succ : MBB->successors()) + BlockInfo[Succ->getNumber()].HasFrameOnEntry = Info.HasFrameOnExit; + } + + if (!PrologueBlock) + return false; + + // Walk the blocks of the function in "physical" order. After each block, the + // current frame state (as recorded in the unwind tables) is the same as + // at the end of that block. + // At each block, where the current frame state differs from what it + // should be at the beginning of that block, insert compensating CFI + // instructions. + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + bool Change = false; + bool HasFrame = false; + bool NeedRememberState = false; + for (MachineBasicBlock &MBB : MF) { + auto &Info = BlockInfo[MBB.getNumber()]; +#ifndef NDEBUG + for (auto *Pred : MBB.predecessors()) + assert(Info.HasFrameOnEntry == + BlockInfo[Pred->getNumber()].HasFrameOnExit && + "Inconsistent call frame state"); +#endif + if (Info.HasFrameOnEntry && !HasFrame) { + // Reset to the "after prologue" state. + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr)); + BuildMI(MBB, MBB.begin(), DebugLoc(), + TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + CFIIndex = MF.addFrameInst(MCCFIInstruction::createRestoreState(nullptr)); + BuildMI(MBB, MBB.begin(), DebugLoc(), + TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + NeedRememberState = true; + Change = true; + } else if (!Info.HasFrameOnEntry && HasFrame) { + // Reset to the state upon function entry. + resetToInitialState(MBB); + Change = true; + } + + HasFrame = Info.HasFrameOnExit; + } + + // If we had to insert remember/restore state anywhere in the + // function, insert `.cfi_remember_state` immediately after the last CFI + // instruction in the prologue block. + if (NeedRememberState) { + MachineBasicBlock::iterator InsertPt = PrologueBlock->end(); + for (MachineBasicBlock::iterator I = PrologueBlock->begin(), + E = PrologueBlock->end(); + I != E; ++I) + if (I->getOpcode() == TargetOpcode::CFI_INSTRUCTION) + InsertPt = std::next(I); + + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createRememberState(nullptr)); + BuildMI(*PrologueBlock, InsertPt, DebugLoc(), + TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex); + } + return Change; +} Index: llvm/lib/CodeGen/CMakeLists.txt =================================================================== --- llvm/lib/CodeGen/CMakeLists.txt +++ llvm/lib/CodeGen/CMakeLists.txt @@ -12,6 +12,7 @@ CallingConvLower.cpp CFGuardLongjmp.cpp CFIInstrInserter.cpp + CFIFixup.cpp CodeGen.cpp CodeGenCommonISel.cpp CodeGenPassBuilder.cpp Index: llvm/lib/CodeGen/TailDuplicator.cpp =================================================================== --- llvm/lib/CodeGen/TailDuplicator.cpp +++ llvm/lib/CodeGen/TailDuplicator.cpp @@ -386,8 +386,9 @@ // Allow duplication of CFI instructions. if (MI->isCFIInstruction()) { BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()), - TII->get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex( - MI->getOperand(0).getCFIIndex()); + TII->get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(MI->getOperand(0).getCFIIndex()) + .setMIFlags(MI->getFlags()); return; } MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI); Index: llvm/lib/Target/AArch64/AArch64.h =================================================================== --- llvm/lib/Target/AArch64/AArch64.h +++ llvm/lib/Target/AArch64/AArch64.h @@ -72,6 +72,7 @@ void initializeAArch64A57FPLoadBalancingPass(PassRegistry&); void initializeAArch64AdvSIMDScalarPass(PassRegistry&); void initializeAArch64BranchTargetsPass(PassRegistry&); +void initializeAArch64CFIFixupPass(PassRegistry&); void initializeAArch64CollectLOHPass(PassRegistry&); void initializeAArch64CondBrTuningPass(PassRegistry &); void initializeAArch64CompressJumpTablesPass(PassRegistry&); Index: llvm/lib/Target/AArch64/AArch64CFIFixup.h =================================================================== --- /dev/null +++ llvm/lib/Target/AArch64/AArch64CFIFixup.h @@ -0,0 +1,37 @@ +//===------ AArch64CFIFixup.h - Insert CFI remember/restore instructions --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// AArch64 specific implementation for the CFIFixup pass. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64CFIFIXUP_H +#define LLVM_LIB_TARGET_AARCH64_AARCH64CFIFIXUP_H + +#include "llvm/CodeGen/CFIFixup.h" +#include "AArch64.h" + +namespace llvm { + +class AArch64CFIFixup : public CFIFixup { +public: + static char ID; + + AArch64CFIFixup() : CFIFixup(ID) { + initializeAArch64CFIFixupPass(*PassRegistry::getPassRegistry()); + } + +protected: + bool skipFunction(MachineFunction &MF) const override; + void resetToInitialState(MachineBasicBlock &MBB) const override; +}; + +} // namespace llvm + +#endif // LLVM_LIB_TARGET_AARCH64_AARCH64CFIFIXUP_H \ No newline at end of file Index: llvm/lib/Target/AArch64/AArch64CFIFixup.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/AArch64/AArch64CFIFixup.cpp @@ -0,0 +1,81 @@ +//===- AArch64CFIFixup - AArch64 CFI remember/testore insertion pass ------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// Contains AArch64 specific parts of the base CFIFixup pass. +/// +//===----------------------------------------------------------------------===// +#include "AArch64CFIFixup.h" +#include "AArch64MachineFunctionInfo.h" +#include "AArch64Subtarget.h" +#include "llvm/MC/MCDwarf.h" + +using namespace llvm; + +#define DEBUG_TYPE "aarch64-cfi-fixup" + +char AArch64CFIFixup::ID = 0; + +INITIALIZE_PASS(AArch64CFIFixup, "aarch64-cfi-fixup", + "Insert CFI remember/restore state instructions", false, false) + +bool AArch64CFIFixup::skipFunction(MachineFunction &MF) const { + return !MF.getInfo()->needsAsyncUnwindInfo() || + CFIFixup::skipFunction(MF); +} + +static void insertCFISameValue(const MCInstrDesc &Desc, MachineFunction &MF, + MachineBasicBlock &MBB, + MachineBasicBlock::iterator InsertPt, + unsigned DwarfReg) { + unsigned CFIIndex = + MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, DwarfReg)); + BuildMI(MBB, InsertPt, DebugLoc(), Desc).addCFIIndex(CFIIndex); +} + +void AArch64CFIFixup::resetToInitialState(MachineBasicBlock &MBB) const { + MachineFunction &MF = *MBB.getParent(); + const auto &Subtarget = MF.getSubtarget(); + const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); + const auto &TRI = + static_cast(*Subtarget.getRegisterInfo()); + const auto &MFI = *MF.getInfo(); + + const MCInstrDesc &CFIDesc = TII.get(TargetOpcode::CFI_INSTRUCTION); + DebugLoc DL; + + // Reset the CFA to `SP + 0`. + MachineBasicBlock::iterator InsertPt = MBB.begin(); + unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( + nullptr, TRI.getDwarfRegNum(AArch64::SP, true), 0)); + BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex); + + // Flip the RA sign state. + if (MFI.shouldSignReturnAddress()) { + CFIIndex = MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr)); + BuildMI(MBB, InsertPt, DL, CFIDesc).addCFIIndex(CFIIndex); + } + + // X18 could have been used for the shadow call stack, reset it. + if (Subtarget.isXRegisterReserved(18)) + insertCFISameValue(CFIDesc, MF, MBB, InsertPt, + TRI.getDwarfRegNum(AArch64::X18, true)); + + // Emit .cfi_same_value for callee-saved registers. + const std::vector &CSI = + MF.getFrameInfo().getCalleeSavedInfo(); + if (CSI.empty()) + return; + for (const auto &Info : CSI) { + unsigned Reg = Info.getReg(); + if (!TRI.regNeedsCFI(Reg, Reg)) + continue; + insertCFISameValue(CFIDesc, MF, MBB, InsertPt, + TRI.getDwarfRegNum(Reg, true)); + } +} \ No newline at end of file Index: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -11,6 +11,7 @@ #include "AArch64TargetMachine.h" #include "AArch64.h" +#include "AArch64CFIFixup.h" #include "AArch64MachineFunctionInfo.h" #include "AArch64MacroFusion.h" #include "AArch64Subtarget.h" @@ -31,6 +32,7 @@ #include "llvm/CodeGen/MIRParser/MIParser.h" #include "llvm/CodeGen/MachineScheduler.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/CFIFixup.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" @@ -199,6 +201,7 @@ initializeAArch64A57FPLoadBalancingPass(*PR); initializeAArch64AdvSIMDScalarPass(*PR); initializeAArch64BranchTargetsPass(*PR); + initializeAArch64CFIFixupPass(*PR); initializeAArch64CollectLOHPass(*PR); initializeAArch64CompressJumpTablesPass(*PR); initializeAArch64ConditionalComparesPass(*PR); @@ -795,6 +798,10 @@ // SVE bundles move prefixes with destructive operations. BLR_RVMARKER pseudo // instructions are lowered to bundles as well. addPass(createUnpackMachineBundles(nullptr)); + + if (!TM->getMCAsmInfo()->usesWindowsCFI() && + !TM->getTargetTriple().isOSBinFormatMachO()) + addPass(new AArch64CFIFixup()); } yaml::MachineFunctionInfo * Index: llvm/lib/Target/AArch64/CMakeLists.txt =================================================================== --- llvm/lib/Target/AArch64/CMakeLists.txt +++ llvm/lib/Target/AArch64/CMakeLists.txt @@ -45,6 +45,7 @@ AArch64AsmPrinter.cpp AArch64BranchTargets.cpp AArch64CallingConvention.cpp + AArch64CFIFixup.cpp AArch64CleanupLocalDynamicTLSPass.cpp AArch64CollectLOH.cpp AArch64CondBrTuning.cpp Index: llvm/test/CodeGen/AArch64/O0-pipeline.ll =================================================================== --- llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -66,6 +66,7 @@ ; CHECK-NEXT: StackMap Liveness Analysis ; CHECK-NEXT: Live DEBUG_VALUE analysis ; CHECK-NEXT: Unpack machine instruction bundles +; CHECK-NEXT: Insert CFI remember/restore state instructions ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: AArch64 Assembly Printer Index: llvm/test/CodeGen/AArch64/O3-pipeline.ll =================================================================== --- llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -204,6 +204,7 @@ ; CHECK-NEXT: Machine Outliner ; CHECK-NEXT: FunctionPass Manager ; CHECK-NEXT: Unpack machine instruction bundles +; CHECK-NEXT: Insert CFI remember/restore state instructions ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter ; CHECK-NEXT: AArch64 Assembly Printer Index: llvm/test/CodeGen/AArch64/arm64-fp128.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-fp128.ll +++ llvm/test/CodeGen/AArch64/arm64-fp128.ll @@ -277,12 +277,13 @@ } -define dso_local i32 @test_br_cc() { +define dso_local i32 @test_br_cc() uwtable { ; CHECK-LABEL: test_br_cc: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: adrp x8, lhs ; CHECK-NEXT: ldr q0, [x8, :lo12:lhs] ; CHECK-NEXT: adrp x8, rhs @@ -293,10 +294,16 @@ ; CHECK-NEXT: // %bb.1: // %iftrue ; CHECK-NEXT: mov w0, #42 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB11_2: // %iffalse +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: mov w0, #29 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret %lhs = load fp128, fp128* @lhs, align 16 Index: llvm/test/CodeGen/AArch64/call-rv-marker.ll =================================================================== --- llvm/test/CodeGen/AArch64/call-rv-marker.ll +++ llvm/test/CodeGen/AArch64/call-rv-marker.ll @@ -54,12 +54,18 @@ ret void } -define dso_local void @rv_marker_3() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +define dso_local void @rv_marker_3() uwtable personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: rv_marker_3 ; CHECK: .cfi_offset w30, -32 +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: bl foo1 ; SELDAG-NEXT: mov x29, x29 ; +; CHECK: .cfi_restore w30 +; CHECK-NEXT: b objc_release +; CHECK-NEXT:.LBB2_2: // %lpad +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state entry: %call = call i8* @foo1() [ "clang.arc.attachedcall"() ] invoke void @objc_object(i8* %call) #5 Index: llvm/test/CodeGen/AArch64/cfi-fixup.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/cfi-fixup.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-linux %s -o - | FileCheck %s + +define i32 @f0(i32 %x) #0 { +; CHECK-LABEL: f0: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: cbz w0, .LBB0_4 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: cmp w0, #2 +; CHECK-NEXT: b.eq .LBB0_5 +; CHECK-NEXT: // %bb.2: // %entry +; CHECK-NEXT: cmp w0, #1 +; CHECK-NEXT: b.ne .LBB0_6 +; CHECK-NEXT: // %bb.3: // %if.then2 +; CHECK-NEXT: bl g1 +; CHECK-NEXT: add w0, w0, #1 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_5: // %if.then5 +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: bl g0 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: sub w0, w8, w0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_6: // %if.end7 +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: ret +entry: + switch i32 %x, label %if.end7 [ + i32 0, label %return + i32 1, label %if.then2 + i32 2, label %if.then5 + ] + +if.then2: + %call = tail call i32 @g1(i32 1) + %add = add nsw i32 %call, 1 + br label %return + +if.then5: + %call6 = tail call i32 @g0(i32 2) + %sub = sub nsw i32 1, %call6 + br label %return + +if.end7: + br label %return + +return: + %retval.0 = phi i32 [ %add, %if.then2 ], [ %sub, %if.then5 ], [ 0, %if.end7 ], [ 1, %entry ] + ret i32 %retval.0 +} + +declare i32 @g1(i32) + +declare i32 @g0(i32) + +define i32 @f1(i32 %x) #0 { +; CHECK-LABEL: f1: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cbz w0, .LBB1_2 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: bl g0 +; CHECK-NEXT: add w0, w0, #1 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .LBB1_2: // %return +; CHECK-NEXT: ret +entry: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %return, label %if.end + +if.end: + %call = tail call i32 @g0(i32 %x) + %add = add nsw i32 %call, 1 + br label %return + +return: + %retval.0 = phi i32 [ %add, %if.end ], [ 0, %entry ] + ret i32 %retval.0 +} + +define i32 @f2(i32 %x) #0 { +; CHECK-LABEL: f2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: cbz w0, .LBB2_2 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: bl g1 +; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: sub w0, w8, w0 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB2_2: // %if.then +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: bl g0 +; CHECK-NEXT: add w0, w0, #1 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: ret +entry: + %cmp = icmp eq i32 %x, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: + %call = tail call i32 @g0(i32 0) + %add = add nsw i32 %call, 1 + br label %return + +if.end: + %call1 = tail call i32 @g1(i32 %x) + %sub = sub nsw i32 1, %call1 + br label %return + +return: + %retval.0 = phi i32 [ %add, %if.then ], [ %sub, %if.end ] + ret i32 %retval.0 +} + +attributes #0 = { uwtable } Index: llvm/test/CodeGen/AArch64/cfi-fixup.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/cfi-fixup.mir @@ -0,0 +1,343 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64 -run-pass=aarch64-cfi-fixup %s -o - | FileCheck %s +--- | + define i32 @f0(i32 %x) #0 { + entry: br label %return + if.end: br label %return + if.then2: br label %return + if.else: br label %return + return: + ret i32 0 + } + + define i32 @f1(i32 %x) #0 { + entry: br label %return + if.end: br label %return + if.then2: br label %return + if.else: br label %return + return: + ret i32 0 + } + + declare i32 @g(i32) + + attributes #0 = { nounwind shadowcallstack uwtable "sign-return-address"="non-leaf" "target-features"="+reserve-x18" } + +... +--- +name: f0 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +failsVerification: false +registers: [] +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 16 + offsetAdjustment: 0 + maxAlignment: 16 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + ; CHECK-LABEL: name: f0 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.1(0x50000000) + ; CHECK-NEXT: liveins: $w0, $lr, $x18 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CBZW renamable $w0, %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.end: + ; CHECK-NEXT: successors: %bb.3(0x30000000), %bb.2(0x50000000) + ; CHECK-NEXT: liveins: $w0, $lr, $x18 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $x18 = frame-setup STRXpost $lr, $x18, 8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x16, 0x12, 0x02, 0x82, 0x78 + ; CHECK-NEXT: frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -16 + ; CHECK-NEXT: CFI_INSTRUCTION remember_state + ; CHECK-NEXT: TBNZW renamable $w0, 31, %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.else: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w0 = nuw nsw ADDWri killed renamable $w0, 1, 0 + ; CHECK-NEXT: BL @g, csr_aarch64_aapcs_scs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: renamable $w8 = MOVZWi 1, 0 + ; CHECK-NEXT: $w0 = SUBWrs killed renamable $w8, killed renamable $w0, 0 + ; CHECK-NEXT: B %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.if.then2: + ; CHECK-NEXT: successors: %bb.5(0x80000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w0 = nsw SUBWri killed renamable $w0, 1, 0 + ; CHECK-NEXT: BL @g, csr_aarch64_aapcs_scs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: renamable $w0 = nsw ADDWri killed renamable $w0, 1, 0 + ; CHECK-NEXT: B %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.return: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CFI_INSTRUCTION def_cfa $wsp, 0 + ; CHECK-NEXT: CFI_INSTRUCTION negate_ra_sign_state + ; CHECK-NEXT: CFI_INSTRUCTION same_value $w18 + ; CHECK-NEXT: CFI_INSTRUCTION same_value $w30 + ; CHECK-NEXT: RET undef $lr, implicit killed $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.return: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CFI_INSTRUCTION restore_state + ; CHECK-NEXT: CFI_INSTRUCTION remember_state + ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + ; CHECK-NEXT: frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION negate_ra_sign_state + ; CHECK-NEXT: early-clobber $x18, $lr = frame-destroy LDRXpre $x18, -8 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w18 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30 + ; CHECK-NEXT: RET undef $lr, implicit killed $w0 + bb.0.entry: + successors: %bb.4(0x30000000), %bb.1(0x50000000) + liveins: $w0, $lr, $x18 + + CBZW renamable $w0, %bb.4 + + bb.1.if.end: + successors: %bb.3(0x30000000), %bb.2(0x50000000) + liveins: $w0, $lr, $x18 + + early-clobber $x18 = frame-setup STRXpost $lr, $x18, 8 + frame-setup CFI_INSTRUCTION escape 0x16, 0x12, 0x02, 0x82, 0x78 + frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp + frame-setup CFI_INSTRUCTION negate_ra_sign_state + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) + frame-setup CFI_INSTRUCTION def_cfa_offset 16 + frame-setup CFI_INSTRUCTION offset $w30, -16 + TBNZW renamable $w0, 31, %bb.3 + + bb.2.if.else: + successors: %bb.5(0x80000000) + liveins: $w0 + + renamable $w0 = nuw nsw ADDWri killed renamable $w0, 1, 0 + BL @g, csr_aarch64_aapcs_scs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + renamable $w8 = MOVZWi 1, 0 + $w0 = SUBWrs killed renamable $w8, killed renamable $w0, 0 + B %bb.5 + + bb.3.if.then2: + successors: %bb.5(0x80000000) + liveins: $w0 + + renamable $w0 = nsw SUBWri killed renamable $w0, 1, 0 + BL @g, csr_aarch64_aapcs_scs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + renamable $w0 = nsw ADDWri killed renamable $w0, 1, 0 + B %bb.5 + + bb.4.return: + liveins: $w0 + RET undef $lr, implicit killed $w0 + + bb.5.return: + liveins: $w0 + + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp + frame-destroy CFI_INSTRUCTION negate_ra_sign_state + early-clobber $x18, $lr = frame-destroy LDRXpre $x18, -8 + frame-destroy CFI_INSTRUCTION restore $w18 + frame-destroy CFI_INSTRUCTION restore $w30 + RET undef $lr, implicit killed $w0 + +... +--- +name: f1 +alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +failsVerification: false +registers: [] +liveins: + - { reg: '$w0', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 16 + offsetAdjustment: 0 + maxAlignment: 16 + adjustsStack: true + hasCalls: true + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -16, size: 8, alignment: 16, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: + hasRedZone: false +body: | + ; CHECK-LABEL: name: f1 + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.5(0x30000000), %bb.1(0x50000000) + ; CHECK-NEXT: liveins: $w0, $lr, $x18 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: CBZW renamable $w0, %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.if.end: + ; CHECK-NEXT: successors: %bb.3(0x30000000), %bb.2(0x50000000) + ; CHECK-NEXT: liveins: $w0, $lr, $x18 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $x18 = frame-setup STRXpost $lr, $x18, 8 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x16, 0x12, 0x02, 0x82, 0x78 + ; CHECK-NEXT: frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION negate_ra_sign_state + ; CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -16 + ; CHECK-NEXT: TBNZW renamable $w0, 31, %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2.if.else: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w0 = nuw nsw ADDWri killed renamable $w0, 1, 0 + ; CHECK-NEXT: BL @g, csr_aarch64_aapcs_scs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: renamable $w8 = MOVZWi 1, 0 + ; CHECK-NEXT: $w0 = SUBWrs killed renamable $w8, killed renamable $w0, 0 + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.if.then2: + ; CHECK-NEXT: successors: %bb.4(0x80000000) + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $w0 = nsw SUBWri killed renamable $w0, 1, 0 + ; CHECK-NEXT: BL @g, csr_aarch64_aapcs_scs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + ; CHECK-NEXT: renamable $w0 = nsw ADDWri killed renamable $w0, 1, 0 + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.return: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + ; CHECK-NEXT: frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION negate_ra_sign_state + ; CHECK-NEXT: early-clobber $x18, $lr = frame-destroy LDRXpre $x18, -8 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w18 + ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30 + ; CHECK-NEXT: RET undef $lr, implicit killed $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.return: + ; CHECK-NEXT: liveins: $w0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: RET undef $lr, implicit killed $w0 + bb.0.entry: + successors: %bb.5(0x30000000), %bb.1(0x50000000) + liveins: $w0, $lr, $x18 + + CBZW renamable $w0, %bb.5 + + bb.1.if.end: + successors: %bb.3(0x30000000), %bb.2(0x50000000) + liveins: $w0, $lr, $x18 + + early-clobber $x18 = frame-setup STRXpost $lr, $x18, 8 + frame-setup CFI_INSTRUCTION escape 0x16, 0x12, 0x02, 0x82, 0x78 + frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp + frame-setup CFI_INSTRUCTION negate_ra_sign_state + early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store (s64) into %stack.0) + frame-setup CFI_INSTRUCTION def_cfa_offset 16 + frame-setup CFI_INSTRUCTION offset $w30, -16 + TBNZW renamable $w0, 31, %bb.3 + + bb.2.if.else: + successors: %bb.4(0x80000000) + liveins: $w0 + + renamable $w0 = nuw nsw ADDWri killed renamable $w0, 1, 0 + BL @g, csr_aarch64_aapcs_scs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + renamable $w8 = MOVZWi 1, 0 + $w0 = SUBWrs killed renamable $w8, killed renamable $w0, 0 + B %bb.4 + + bb.3.if.then2: + successors: %bb.4(0x80000000) + liveins: $w0 + + renamable $w0 = nsw SUBWri killed renamable $w0, 1, 0 + BL @g, csr_aarch64_aapcs_scs, implicit-def dead $lr, implicit $sp, implicit killed $w0, implicit-def $sp, implicit-def $w0 + renamable $w0 = nsw ADDWri killed renamable $w0, 1, 0 + B %bb.4 + + bb.4.return: + liveins: $w0 + + early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.0) + frame-destroy CFI_INSTRUCTION def_cfa_offset 0 + frame-destroy AUTIASP implicit-def $lr, implicit killed $lr, implicit $sp + frame-destroy CFI_INSTRUCTION negate_ra_sign_state + early-clobber $x18, $lr = frame-destroy LDRXpre $x18, -8 + frame-destroy CFI_INSTRUCTION restore $w18 + frame-destroy CFI_INSTRUCTION restore $w30 + RET undef $lr, implicit killed $w0 + + bb.5.return: + liveins: $w0 + RET undef $lr, implicit killed $w0 + +... Index: llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll =================================================================== --- llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -569,6 +569,7 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: adrp x8, :got:a ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] ; CHECK-NEXT: ldr w8, [x8] @@ -604,6 +605,8 @@ ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB8_6: // %if.end +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 Index: llvm/test/CodeGen/AArch64/cond-br-tuning.ll =================================================================== --- llvm/test/CodeGen/AArch64/cond-br-tuning.ll +++ llvm/test/CodeGen/AArch64/cond-br-tuning.ll @@ -187,6 +187,7 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: and w19, w1, #0x6 ; CHECK-NEXT: mov w0, w19 ; CHECK-NEXT: bl bar @@ -198,6 +199,8 @@ ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB9_2: // %if.then +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: bl foo entry: %c = and i32 %a, 6 Index: llvm/test/CodeGen/AArch64/csr-split.ll =================================================================== --- llvm/test/CodeGen/AArch64/csr-split.ll +++ llvm/test/CodeGen/AArch64/csr-split.ll @@ -13,6 +13,7 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: adrp x8, a ; CHECK-NEXT: ldrsw x8, [x8, :lo12:a] ; CHECK-NEXT: cmp x8, x0 @@ -24,6 +25,8 @@ ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %if.then +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: bl callVoid ; CHECK-NEXT: mov x0, x19 @@ -88,6 +91,7 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: cbz x0, .LBB1_3 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: adrp x8, a @@ -104,6 +108,8 @@ ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: b callNonVoid ; CHECK-NEXT: .LBB1_3: // %return +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 Index: llvm/test/CodeGen/AArch64/merge-store-dependency.ll =================================================================== --- llvm/test/CodeGen/AArch64/merge-store-dependency.ll +++ llvm/test/CodeGen/AArch64/merge-store-dependency.ll @@ -14,6 +14,7 @@ ; A53-NEXT: .cfi_def_cfa_offset 16 ; A53-NEXT: .cfi_offset w19, -8 ; A53-NEXT: .cfi_offset w30, -16 +; A53-NEXT: .cfi_remember_state ; A53-NEXT: movi v0.2d, #0000000000000000 ; A53-NEXT: mov x8, x0 ; A53-NEXT: mov x19, x8 @@ -51,6 +52,8 @@ ; A53-NEXT: ret ; A53-NEXT: .LBB0_4: // %while.body.i.split ; A53-NEXT: // =>This Inner Loop Header: Depth=1 +; A53-NEXT: .cfi_restore_state +; A53-NEXT: .cfi_remember_state ; A53-NEXT: b .LBB0_4 entry: %0 = bitcast %struct1* %fde to i8* Index: llvm/test/CodeGen/AArch64/nomerge.ll =================================================================== --- llvm/test/CodeGen/AArch64/nomerge.ll +++ llvm/test/CodeGen/AArch64/nomerge.ll @@ -1,6 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64 -o - | FileCheck %s -define void @foo(i32 %i) { +define void @foo(i32 %i) uwtable { +; CHECK-LABEL: foo: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: cmp w0, #7 +; CHECK-NEXT: b.eq .LBB0_3 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: cmp w0, #5 +; CHECK-NEXT: b.ne .LBB0_4 +; CHECK-NEXT: // %bb.2: // %if.then +; CHECK-NEXT: bl bar +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: b bar +; CHECK-NEXT: .LBB0_3: // %if.then2 +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state +; CHECK-NEXT: bl bar +; CHECK-NEXT: .LBB0_4: // %if.end3 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: b bar entry: switch i32 %i, label %if.end3 [ i32 5, label %if.then @@ -23,14 +50,3 @@ declare void @bar() attributes #0 = { nomerge } - -; CHECK-LABEL: foo: -; CHECK: // %bb.0: // %entry -; CHECK: // %bb.1: // %entry -; CHECK: // %bb.2: // %if.then -; CHECK-NEXT: bl bar -; CHECK: b bar -; CHECK: .LBB0_3: // %if.then2 -; CHECK-NEXT: bl bar -; CHECK: .LBB0_4: // %if.end3 -; CHECK: b bar Index: llvm/test/CodeGen/AArch64/optimize-cond-branch.ll =================================================================== --- llvm/test/CodeGen/AArch64/optimize-cond-branch.ll +++ llvm/test/CodeGen/AArch64/optimize-cond-branch.ll @@ -19,6 +19,7 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: cbz wzr, .LBB0_4 ; CHECK-NEXT: // %bb.2: // %b3 ; CHECK-NEXT: ldr w8, [x8] @@ -30,6 +31,8 @@ ; CHECK-NEXT: .LBB0_3: // %common.ret.sink.split ; CHECK-NEXT: b extfunc ; CHECK-NEXT: .LBB0_4: // %b2 +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: bl extfunc ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 Index: llvm/test/CodeGen/AArch64/stack-guard-sysreg.ll =================================================================== --- llvm/test/CodeGen/AArch64/stack-guard-sysreg.ll +++ llvm/test/CodeGen/AArch64/stack-guard-sysreg.ll @@ -47,6 +47,7 @@ ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: mrs x8, SP_EL0 ; CHECK-NEXT: lsl x9, x0, #2 @@ -88,6 +89,8 @@ ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %entry +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: bl __stack_chk_fail ; CHECK-NOT: __stack_chk_guard entry: Index: llvm/test/CodeGen/AArch64/unwind-preserved.ll =================================================================== --- llvm/test/CodeGen/AArch64/unwind-preserved.ll +++ llvm/test/CodeGen/AArch64/unwind-preserved.ll @@ -52,6 +52,7 @@ ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: bl may_throw_sve @@ -106,6 +107,8 @@ ; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %.Lunwind +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: .Ltmp2: ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #2 @@ -202,6 +205,7 @@ ; GISEL-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG ; GISEL-NEXT: addvl sp, sp, #-2 ; GISEL-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xa0, 0x01, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 160 * VG +; GISEL-NEXT: .cfi_remember_state ; GISEL-NEXT: str z0, [sp] // 16-byte Folded Spill ; GISEL-NEXT: .Ltmp0: ; GISEL-NEXT: bl may_throw_sve @@ -256,6 +260,8 @@ ; GISEL-NEXT: .cfi_restore w29 ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB0_2: // %.Lunwind +; GISEL-NEXT: .cfi_restore_state +; GISEL-NEXT: .cfi_remember_state ; GISEL-NEXT: .Ltmp2: ; GISEL-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; GISEL-NEXT: addvl sp, sp, #2 @@ -349,6 +355,7 @@ ; CHECK-NEXT: .cfi_offset b21, -240 ; CHECK-NEXT: .cfi_offset b22, -256 ; CHECK-NEXT: .cfi_offset b23, -272 +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: .Ltmp3: ; CHECK-NEXT: bl may_throw_neon @@ -388,6 +395,8 @@ ; CHECK-NEXT: .cfi_restore b23 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: // %.Lunwind +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: .Ltmp5: ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload @@ -454,6 +463,7 @@ ; GISEL-NEXT: .cfi_offset b21, -240 ; GISEL-NEXT: .cfi_offset b22, -256 ; GISEL-NEXT: .cfi_offset b23, -272 +; GISEL-NEXT: .cfi_remember_state ; GISEL-NEXT: str q0, [sp] // 16-byte Folded Spill ; GISEL-NEXT: .Ltmp3: ; GISEL-NEXT: bl may_throw_neon @@ -493,6 +503,8 @@ ; GISEL-NEXT: .cfi_restore b23 ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB1_2: // %.Lunwind +; GISEL-NEXT: .cfi_restore_state +; GISEL-NEXT: .cfi_remember_state ; GISEL-NEXT: .Ltmp5: ; GISEL-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; GISEL-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload Index: llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll =================================================================== --- llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll +++ llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll @@ -148,6 +148,7 @@ ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: mov w20, wzr ; CHECK-NEXT: mov w21, #40000 @@ -178,6 +179,8 @@ ; CHECK-NEXT: .cfi_restore w30 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_5: // %cleanup +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: .cfi_remember_state ; CHECK-NEXT: .Ltmp2: ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: bl foo2