diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -289,6 +289,10 @@ "fuse-literals", "HasFuseLiterals", "true", "CPU fuses literal generation operations">; +def FeatureFuseAddSub2RegAndConstOne : SubtargetFeature< + "fuse-addsub-2reg-const1", "HasFuseeAddSub2RegAndConstOne", "true", + "CPU fuses (a + b + 1) and (a - b - 1)">; + def FeatureDisableLatencySchedHeuristic : SubtargetFeature< "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", "Disable latency scheduling heuristic">; @@ -1306,12 +1310,12 @@ FeatureNEON, FeaturePerfMon, FeatureSPE, FeatureFullFP16, FeatureFP16FML, FeatureDotProd]; list Ampere1 = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, - FeatureSSBS, FeatureRandGen, FeatureSB, - FeatureSHA2, FeatureSHA3, FeatureAES]; + FeatureSSBS, FeatureRandGen, FeatureSHA2, + FeatureSHA3, FeatureAES]; list Ampere1A = [HasV8_6aOps, FeatureNEON, FeaturePerfMon, FeatureMTE, FeatureSSBS, FeatureRandGen, - FeatureSB, FeatureSM4, FeatureSHA2, - FeatureSHA3, FeatureAES]; + FeatureSM4, FeatureSHA2, FeatureSHA3, + FeatureAES]; // ETE and TRBE are future architecture extensions. We temporarily enable them // by default for users targeting generic AArch64. The extensions do not diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -379,6 +379,64 @@ return false; } +// "A + B + 1" or "A - B - 1" +static bool isAddSub2RegAndConstOnePair(const MachineInstr *FirstMI, + const MachineInstr &SecondMI) { + bool NeedsSubtract = false; + + // The 2nd instr must be an add-immediate or subtract-immediate. + switch (SecondMI.getOpcode()) { + case AArch64::SUBWri: + case AArch64::SUBXri: + NeedsSubtract = true; + [[fallthrough]]; + case AArch64::ADDWri: + case AArch64::ADDXri: + break; + + default: + return false; + } + + // The immediate in the 2nd instr must be "1". + if (!SecondMI.getOperand(2).isImm() || SecondMI.getOperand(2).getImm() != 1) { + return false; + } + + // Assume the 1st instr to be a wildcard if it is unspecified. + if (FirstMI == nullptr) { + return true; + } + + switch (FirstMI->getOpcode()) { + case AArch64::SUBWrs: + case AArch64::SUBXrs: + if (AArch64InstrInfo::hasShiftedReg(*FirstMI)) + return false; + [[fallthrough]]; + case AArch64::SUBWrr: + case AArch64::SUBXrr: + if (NeedsSubtract) { + return true; + } + break; + + case AArch64::ADDWrs: + case AArch64::ADDXrs: + if (AArch64InstrInfo::hasShiftedReg(*FirstMI)) + return false; + [[fallthrough]]; + case AArch64::ADDWrr: + case AArch64::ADDXrr: + if (!NeedsSubtract) { + return true; + } + break; + } + + return false; +} + /// \brief Check if the instr pair, FirstMI and SecondMI, should be fused /// together. Given SecondMI, when FirstMI is unspecified, then check if /// SecondMI may be part of a fused pair at all. @@ -411,6 +469,9 @@ return true; if (ST.hasFuseArithmeticLogic() && isArithmeticLogicPair(FirstMI, SecondMI)) return true; + if (ST.hasFuseeAddSub2RegAndConstOne() && + isAddSub2RegAndConstOnePair(FirstMI, SecondMI)) + return true; return false; } diff --git a/llvm/test/CodeGen/AArch64/macro-fusion-addsub-2reg-const1.mir b/llvm/test/CodeGen/AArch64/macro-fusion-addsub-2reg-const1.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/macro-fusion-addsub-2reg-const1.mir @@ -0,0 +1,23 @@ +# RUN: llc -o - %s -mtriple=aarch64-- -mattr=+fuse-addsub-2reg-const1 -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,FUSION +# RUN: llc -o - %s -mtriple=aarch64-- -mattr=-fuse-addsub-2reg-const1 -run-pass postmisched | FileCheck %s --check-prefixes=CHECK,NOFUSION +--- +# CHECK-LABEL: name: addsub2reg +# CHECK: $w8 = ADDWrr killed renamable $w0, killed renamable $w1 +# FUSION: renamable $w0 = ADDWri killed renamable $w8, 1, 0 +# CHECK: $w9 = SUBWrr killed renamable $w2, killed renamable $w3 +# NOFUSION: renamable $w0 = ADDWri killed renamable $w8, 1, 0 +# CHECK: renamable $w1 = SUBWri killed renamable $w9, 1, 0 +name: addsub2reg +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $w0, $w1, $w2, $w3 + + $w8 = ADDWrr killed renamable $w0, killed renamable $w1 + $w9 = SUBWrr killed renamable $w2, killed renamable $w3 + renamable $w0 = ADDWri killed renamable $w8, 1, 0 + renamable $w1 = SUBWri killed renamable $w9, 1, 0 + + $w0 = ORRWrs killed renamable $w0, killed renamable $w1, 0 + RET undef $lr, implicit $w0 +...