Index: llvm/include/llvm/CodeGen/MacroFusion.h =================================================================== --- llvm/include/llvm/CodeGen/MacroFusion.h +++ llvm/include/llvm/CodeGen/MacroFusion.h @@ -26,11 +26,13 @@ /// Check if the instr pair, FirstMI and SecondMI, should be fused /// together. Given SecondMI, when FirstMI is unspecified, then check if -/// SecondMI may be part of a fused pair at all. +/// SecondMI may be part of a fused pair at all. And NumFused specify +/// the number of SU's has been fused already in current dependency chain. using ShouldSchedulePredTy = std::function; + const MachineInstr &SecondMI, + unsigned NumFused)>; /// Create a DAG scheduling mutation to pair instructions back to back /// for instructions that benefit according to the target-specific Index: llvm/lib/CodeGen/MacroFusion.cpp =================================================================== --- llvm/lib/CodeGen/MacroFusion.cpp +++ llvm/lib/CodeGen/MacroFusion.cpp @@ -54,6 +54,13 @@ return nullptr; } +static unsigned getNumOfClusterSU(const SUnit &SU) { + unsigned Num = 0; + const SUnit *CurrentSU = &SU; + while ((CurrentSU = getPredClusterSU(*CurrentSU))) Num ++; + return Num; +} + static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU, SUnit &SecondSU) { // Check that neither instr is already paired with another along the edge @@ -173,7 +180,7 @@ const TargetSubtargetInfo &ST = DAG.MF.getSubtarget(); // Check if the anchor instr may be fused. - if (!shouldScheduleAdjacent(TII, ST, nullptr, AnchorMI)) + if (!shouldScheduleAdjacent(TII, ST, nullptr, AnchorMI, 0)) return false; // Explorer for fusion candidates among the dependencies of the anchor instr. @@ -187,7 +194,8 @@ continue; const MachineInstr *DepMI = DepSU.getInstr(); - if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI)) + if (!shouldScheduleAdjacent(TII, ST, DepMI, AnchorMI, + getNumOfClusterSU(DepSU))) continue; if (fuseInstructionPair(DAG, DepSU, AnchorSU)) Index: llvm/lib/Target/AArch64/AArch64MacroFusion.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -375,7 +375,12 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { + const MachineInstr &SecondMI, + unsigned NumFused) { + // Only back to back fusion are supported. + if (NumFused > 0) + return false; + const AArch64Subtarget &ST = static_cast(TSI); // All checking functions assume that the 1st instr is a wildcard if it is Index: llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUMacroFusion.cpp @@ -28,7 +28,12 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII_, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { + const MachineInstr &SecondMI, + unsigned NumFused) { + // Only back to back fusion are supported. + if (NumFused > 0) + return false; + const SIInstrInfo &TII = static_cast(TII_); switch (SecondMI.getOpcode()) { Index: llvm/lib/Target/ARM/ARMMacroFusion.cpp =================================================================== --- llvm/lib/Target/ARM/ARMMacroFusion.cpp +++ llvm/lib/Target/ARM/ARMMacroFusion.cpp @@ -51,7 +51,12 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { + const MachineInstr &SecondMI, + unsigned NumFused) { + // Only back to back fusion are supported. + if (NumFused > 0) + return false; + const ARMSubtarget &ST = static_cast(TSI); if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI)) Index: llvm/lib/Target/X86/X86MacroFusion.cpp =================================================================== --- llvm/lib/Target/X86/X86MacroFusion.cpp +++ llvm/lib/Target/X86/X86MacroFusion.cpp @@ -180,7 +180,12 @@ static bool shouldScheduleAdjacent(const TargetInstrInfo &TII, const TargetSubtargetInfo &TSI, const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { + const MachineInstr &SecondMI, + unsigned NumFused) { + // Only back to back fusion are supported. + if (NumFused > 0) + return false; + const X86Subtarget &ST = static_cast(TSI); // Check if this processor supports any kind of fusion. Index: llvm/test/CodeGen/AArch64/macro-fusion.ll =================================================================== --- llvm/test/CodeGen/AArch64/macro-fusion.ll +++ llvm/test/CodeGen/AArch64/macro-fusion.ll @@ -8,14 +8,10 @@ ; CHECK-LABEL: %bb.0 entry ; CHECK: Macro fuse: SU([[SU4:[0-9]+]]) - SU([[SU5:[0-9]+]]) ; CHECK: Bind SU([[SU1:[0-9]+]]) - SU([[SU4]]) -; CHECK: Macro fuse: SU([[SU5]]) - SU([[SU6:[0-9]+]]) -; CHECK: Bind SU([[SU0:[0-9]+]]) - SU([[SU5]]) -; CHECK: Bind SU([[SU0:[0-9]+]]) - SU([[SU4]]) -; CHECK: SU([[SU0]]): %{{[0-9]+}}:gpr32 = COPY $w3 +; CHECK-NOT: Macro fuse: ; CHECK: SU([[SU1]]): %{{[0-9]+}}:gpr32 = COPY $w2 ; CHECK: SU([[SU4]]): %{{[0-9]+}}:gpr32 = nsw ADDWrr ; CHECK: SU([[SU5]]): %{{[0-9]+}}:gpr32 = nsw ADDWrr -; CHECK: SU([[SU6]]): %{{[0-9]+}}:gpr32 = nsw SUBWrr %add = add nsw i32 %b, %a %add1 = add nsw i32 %add, %c