diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -361,6 +361,9 @@ /// Reinsert debug_values recorded in ScheduleDAGInstrs::DbgValues. void placeDebugValues(); + /// Reinsert cfi instructions recorded in ScheduleDAGInstrs::CFIInstructions. + void placeCFIInstructions(); + /// dump the scheduled Sequence. void dumpSchedule() const; diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h --- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -34,32 +34,34 @@ namespace llvm { - class LiveIntervals; - class MachineFrameInfo; - class MachineFunction; - class MachineInstr; - class MachineLoopInfo; - class MachineOperand; - struct MCSchedClassDesc; - class PressureDiffs; - class PseudoSourceValue; - class RegPressureTracker; - class UndefValue; - class Value; - - /// An individual mapping from virtual register number to SUnit. - struct VReg2SUnit { - unsigned VirtReg; - LaneBitmask LaneMask; - SUnit *SU; - - VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU) +extern cl::opt CFIInstructionScheduling; + +class LiveIntervals; +class MachineFrameInfo; +class MachineFunction; +class MachineInstr; +class MachineLoopInfo; +class MachineOperand; +struct MCSchedClassDesc; +class PressureDiffs; +class PseudoSourceValue; +class RegPressureTracker; +class UndefValue; +class Value; + +/// An individual mapping from virtual register number to SUnit. +struct VReg2SUnit { + unsigned VirtReg; + LaneBitmask LaneMask; + SUnit *SU; + + VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU) : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} - unsigned getSparseSetIndex() const { - return Register::virtReg2Index(VirtReg); - } - }; + unsigned getSparseSetIndex() const { + return Register::virtReg2Index(VirtReg); + } +}; /// Mapping from virtual register to SUnit including an operand index. struct VReg2SUnitOperIdx : public VReg2SUnit { @@ -247,6 +249,13 @@ DbgValueVector DbgValues; MachineInstr *FirstDbgValue = nullptr; + /// When building the ScheduleDAG we remember which instructions precede + /// CFI_INSTRUCTION to emit them at the correct position in the final + /// schedule + using CFIInstructionVector = + std::vector>; + CFIInstructionVector CFIInstructions; + /// Set of live physical registers for updating kill flags. LivePhysRegs LiveRegs; diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -86,6 +86,10 @@ "verify-misched", cl::Hidden, cl::desc("Verify machine instrs before and after machine scheduling")); +cl::opt CFIInstructionScheduling( + "schedule-cfiinstrs", cl::Hidden, cl::init(false), + cl::desc( + "Enable scheduling of CFI instructions during machine scheduling")); } // end namespace llvm #ifndef NDEBUG @@ -436,11 +440,18 @@ /// scheduling across calls. In PostRA scheduling, we need the isCall to enforce /// the boundary, but there would be no benefit to postRA scheduling across /// calls this late anyway. +/// +/// If the option CFIInstructionScheduling is not set, cfi instructions act as +/// scheduling boundaries, otherwise they do. This allows to schedule cfi +/// instructions. static bool isSchedBoundary(MachineBasicBlock::iterator MI, MachineBasicBlock *MBB, MachineFunction *MF, const TargetInstrInfo *TII) { - return MI->isCall() || TII->isSchedulingBoundary(*MI, MBB, *MF); + return MI->isCall() || + (!MI->isCFIInstruction() && + TII->isSchedulingBoundary(*MI, MBB, *MF)) || + (MI->isCFIInstruction() && !CFIInstructionScheduling); } /// A region of an MBB for scheduling. @@ -806,6 +817,8 @@ } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); + placeCFIInstructions(); + placeDebugValues(); LLVM_DEBUG({ @@ -905,6 +918,28 @@ FirstDbgValue = nullptr; } +void ScheduleDAGMI::placeCFIInstructions() { + // If cfi instructions don't get scheduled, CFIInstructions should be empty + assert((CFIInstructionScheduling || CFIInstructions.empty()) && + "Scheduling of CFI instructions is not enabled"); + + for (CFIInstructionVector::iterator CFII = CFIInstructions.end(), + CFIE = CFIInstructions.begin(); + CFII != CFIE; --CFII) { + std::pair Prev = *std::prev(CFII); + MachineInstr *CFIInstr = Prev.first; + MachineBasicBlock::iterator OrigPrevMI = Prev.second; + if (&*RegionBegin == CFIInstr) { + ++RegionBegin; + } + BB->splice(++OrigPrevMI, BB, CFIInstr); + if (OrigPrevMI == std::prev(RegionEnd)) { + RegionEnd = CFIInstr; + } + } + CFIInstructions.clear(); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const { for (MachineBasicBlock::iterator MI = begin(), ME = end(); MI != ME; ++MI) { @@ -1244,6 +1279,8 @@ placeDebugValues(); + placeCFIInstructions(); + LLVM_DEBUG({ dbgs() << "*** Final schedule for " << printMBBReference(*begin()->getParent()) << " ***\n"; diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp --- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -774,6 +774,8 @@ DbgValues.clear(); FirstDbgValue = nullptr; + CFIInstructions.clear(); + assert(Defs.empty() && Uses.empty() && "Only BuildGraph should update Defs/Uses"); Defs.setUniverse(TRI->getNumRegs()); @@ -791,6 +793,8 @@ // Walk the list of instructions, from bottom moving up. MachineInstr *DbgMI = nullptr; + MachineInstr *CFIInstr = nullptr; + for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; MII != MIE; --MII) { MachineInstr &MI = *std::prev(MII); @@ -806,6 +810,19 @@ if (MI.isDebugLabel()) continue; + // If the scheduling of CFI instructions is enabled, we remember them and + // instructions that preced them. CFI instructions are not included in the + // scheduling DAG but get inserted at the correct position later + if (CFIInstr) { + CFIInstructions.push_back(std::make_pair(CFIInstr, &MI)); + CFIInstr = nullptr; + } + + if (MI.isCFIInstruction() && CFIInstructionScheduling) { + CFIInstr = &MI; + continue; + } + SUnit *SU = MISUnitMap[&MI]; assert(SU && "No SUnit mapped to this MI"); @@ -829,6 +846,11 @@ (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) && "Cannot schedule terminators or labels!"); + // If a CFI_INSTRUCTION reaches this point the scheduling of cfi + // instructions must be enabled + assert((!MI.isCFIInstruction() || CFIInstructionScheduling) && + "Scheduling of cfi instructions is not enabled"); + // Add register-based dependencies (data, anti, and output). // For some instructions (calls, returns, inline-asm, etc.) there can // be explicit uses and implicit defs, in which case the use will appear diff --git a/llvm/test/CodeGen/AArch64/cfiinstrs-no-uwtable-scheduling.ll b/llvm/test/CodeGen/AArch64/cfiinstrs-no-uwtable-scheduling.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cfiinstrs-no-uwtable-scheduling.ll @@ -0,0 +1,38 @@ +; RUN: llc -O1 -mtriple aarch64-arm-gnu-linux -schedule-cfiinstrs=true --verify-machineinstrs %s -o - | FileCheck %s +; RUN: llc -O1 -mtriple aarch64-arm-gnu-linux -schedule-cfiinstrs=false --verify-machineinstrs %s -o - | FileCheck %s + +@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1 + +; Function Attrs: nounwind +define dso_local i32 @a() #0 { +entry: + %call = tail call i32 bitcast (i32 (...)* @b to i32 (i32 (...)*)*)(i32 (...)* bitcast (i32 ()* @a to i32 (...)*)) #2 + br label %for.cond + +for.cond: + %call1 = tail call i32 bitcast (i32 (...)* @c to i32 (i8*, i32 (...)*)*)(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0), i32 (...)* nonnull @b) #2 + br label %for.cond +} + +declare dso_local i32 @b(...) +declare dso_local i32 @c(...) local_unnamed_addr + +attributes #0 = { nounwind } + +; CHECK: a: +; CHECK: str x30, [sp, #-32]! +; CHECK: adrp x0, a +; CHECK: add x0, x0, :lo12:a +; CHECK: stp x20, x19, [sp, #16] +; CHECK: bl b +; CHECK: adrp x19, .L.str +; CHECK: adrp x20, b +; CHECK: add x19, x19, :lo12:.L.str +; CHECK: add x20, x20, :lo12:b +; CHECK: .LBB0_1: +; CHECK: mov x0, x19 +; CHECK: mov x1, x20 +; CHECK: bl c +; CHECK: b .LBB0_1 +; CHECK: .Lfunc_end0: +; CHECK: .size a, .Lfunc_end0-a diff --git a/llvm/test/CodeGen/AArch64/cfiinstrs-scheduling.ll b/llvm/test/CodeGen/AArch64/cfiinstrs-scheduling.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/cfiinstrs-scheduling.ll @@ -0,0 +1,57 @@ +; RUN: llc -O1 -mtriple aarch64-arm-gnu-linux -schedule-cfiinstrs=true --verify-machineinstrs %s -o - | FileCheck --check-prefix=CHECK-SCHEDULING %s +; RUN: llc -O1 -mtriple aarch64-arm-gnu-linux -schedule-cfiinstrs=false --verify-machineinstrs %s -o - | FileCheck --check-prefix=CHECK-NO-SCHEDULING %s + +@.str = private unnamed_addr constant [1 x i8] zeroinitializer, align 1 + +; Function Attrs: uwtable +define dso_local i32 @a() #0 { +entry: + %call = tail call i32 bitcast (i32 (...)* @b to i32 (i32 (...)*)*)(i32 (...)* bitcast (i32 ()* @a to i32 (...)*)) #2 + br label %for.cond + +for.cond: ; preds = %for.cond, %entry + %call1 = tail call i32 bitcast (i32 (...)* @c to i32 (i8*, i32 (...)*)*)(i8* getelementptr inbounds ([1 x i8], [1 x i8]* @.str, i64 0, i64 0), i32 (...)* nonnull @b) #2 + br label %for.cond +} + +declare dso_local i32 @b(...) +declare dso_local i32 @c(...) local_unnamed_addr + +; CHECK: a: +; CHECK: .cfi_startproc +; +; CHECK-SCHEDULING: str x30, [sp, #-32]! +; CHECK-SCHEDULING: adrp x0, a +; CHECK-SCHEDULING: add x0, x0, :lo12:a +; CHECK-SCHEDULING: stp x20, x19, [sp, #16] +; CHECK-SCHEDULING: .cfi_def_cfa_offset 32 +; CHECK-SCHEDULING: .cfi_offset w19, -8 +; CHECK-SCHEDULING: .cfi_offset w20, -16 +; CHECK-SCHEDULING: .cfi_offset w30, -32 +; CHECK-SCHEDULING: bl b +; CHECK-SCHEDULING: adrp x19, .L.str +; CHECK-SCHEDULING: adrp x20, b +; CHECK-SCHEDULING: add x19, x19, :lo12:.L.str +; CHECK-SCHEDULING: add x20, x20, :lo12:b +; +; CHECK-NO-SCHEDULING: str x30, [sp, #-32]! +; CHECK-NO-SCHEDULING: stp x20, x19, [sp, #16] +; CHECK-NO-SCHEDULING: .cfi_def_cfa_offset 32 +; CHECK-NO-SCHEDULING: .cfi_offset w19, -8 +; CHECK-NO-SCHEDULING: .cfi_offset w20, -16 +; CHECK-NO-SCHEDULING: .cfi_offset w30, -32 +; CHECK-NO-SCHEDULING: adrp x0, a +; CHECK-NO-SCHEDULING: add x0, x0, :lo12:a +; CHECK-NO-SCHEDULING: bl b +; CHECK-NO-SCHEDULING: adrp x19, .L.str +; CHECK-NO-SCHEDULING: adrp x20, b +; CHECK-NO-SCHEDULING: add x19, x19, :lo12:.L.str +; CHECK-NO-SCHEDULING: add x20, x20, :lo12:b +; +; CHECK: .LBB0_1: +; CHECK: mov x0, x19 +; CHECK: mov x1, x20 +; CHECK: bl c +; CHECK: b .LBB0_1 +; CHECK: .Lfunc_end0: +; CHECK: .size a, .Lfunc_end0-a