Index: llvm/lib/CodeGen/MacroFusion.cpp =================================================================== --- llvm/lib/CodeGen/MacroFusion.cpp +++ llvm/lib/CodeGen/MacroFusion.cpp @@ -32,10 +32,27 @@ static cl::opt EnableMacroFusion("misched-fusion", cl::Hidden, cl::desc("Enable scheduling for macro fusion."), cl::init(true)); +namespace { + static bool isHazard(const SDep &Dep) { return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output; } +static SUnit *getPredClusterSU(const SUnit &SU) { + for (const SDep &SI : SU.Preds) + if (SI.isCluster()) + return SI.getSUnit(); + + return nullptr; +} + +static bool hasLessThanNumFused(const SUnit &SU, unsigned FuseLimit) { + unsigned Num = 1; + const SUnit *CurrentSU = &SU; + while ((CurrentSU = getPredClusterSU(*CurrentSU)) && Num < FuseLimit) Num ++; + return Num < FuseLimit; +} + static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU) { // Check that neither instr is already paired with another along the edge @@ -110,8 +127,6 @@ return true; } -namespace { - /// Post-process the DAG to create cluster edges between instrs that may /// be fused by the processor into a single operation. class MacroFusion : public ScheduleDAGMutation { @@ -161,8 +176,10 @@ if (DepSU.isBoundaryNode()) continue; + // Only back-2-back macro fusion is supported now. const MachineInstr *DepMI = DepSU.getInstr(); - if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI)) + if (!hasLessThanNumFused(DepSU, 2) || + !shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI)) continue; if (fuseInstructionPair(DAG, DepSU, AnchorSU)) Index: llvm/test/CodeGen/AArch64/macro-fusion.ll =================================================================== --- llvm/test/CodeGen/AArch64/macro-fusion.ll +++ llvm/test/CodeGen/AArch64/macro-fusion.ll @@ -1,21 +1,18 @@ ; REQUIRES: asserts ; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+fuse-arith-logic -verify-misched -debug-only=machine-scheduler 2>&1 > /dev/null | FileCheck %s -; Verify that, the macro-fusion creates the necessary dependencies between SUs. +; Verify that, the macro-fusion creates the necessary dependencies between SUs and +; only 2 SU's are fused at most. define signext i32 @test(i32 signext %a, i32 signext %b, i32 signext %c, i32 signext %d) { entry: ; CHECK: ********** MI Scheduling ********** ; CHECK-LABEL: %bb.0 entry ; CHECK: Macro fuse: SU([[SU4:[0-9]+]]) - SU([[SU5:[0-9]+]]) ; CHECK: Bind SU([[SU1:[0-9]+]]) - SU([[SU4]]) -; CHECK: Macro fuse: SU([[SU5]]) - SU([[SU6:[0-9]+]]) -; CHECK: Bind SU([[SU0:[0-9]+]]) - SU([[SU5]]) -; CHECK: SU([[SU0]]): %{{[0-9]+}}:gpr32 = COPY $w3 +; CHECK-NOT: Macro fuse: ; CHECK: SU([[SU1]]): %{{[0-9]+}}:gpr32 = COPY $w2 ; CHECK: SU([[SU4]]): %{{[0-9]+}}:gpr32 = nsw ADDWrr ; CHECK: SU([[SU5]]): %{{[0-9]+}}:gpr32 = nsw ADDWrr -; CHECK: SU([[SU6]]): %{{[0-9]+}}:gpr32 = nsw SUBWrr - %add = add nsw i32 %b, %a %add1 = add nsw i32 %add, %c %sub = sub nsw i32 %add1, %d