diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -218,6 +218,10 @@ "arith-cbz-fusion", "HasArithmeticCbzFusion", "true", "CPU fuses arithmetic + cbz/cbnz operations">; +def FeatureCmpBccFusion : SubtargetFeature< + "cmp-bcc-fusion", "HasCmpBccFusion", "true", + "CPU fuses cmp+bcc operations">; + def FeatureFuseAddress : SubtargetFeature< "fuse-address", "HasFuseAddress", "true", "CPU fuses address generation and memory operations">; @@ -615,6 +619,9 @@ FeatureDotProd, FeatureFPARMv8, FeatureFullFP16, + FeatureFuseAddress, + FeatureFuseAES, + FeatureFuseLiterals, FeatureNEON, FeatureRAS, FeatureRCPC, @@ -627,6 +634,7 @@ FeatureCrypto, FeatureFPARMv8, FeatureFuseAES, + FeatureFuseLiterals, FeatureNEON, FeaturePerfMon ]>; @@ -658,6 +666,7 @@ "Cortex-A76 ARM processors", [ HasV8_2aOps, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeatureRCPC, FeatureCrypto, @@ -669,7 +678,9 @@ def ProcA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", "Cortex-A77 ARM processors", [ HasV8_2aOps, + FeatureCmpBccFusion, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeatureRCPC, FeatureCrypto, FeatureFullFP16, @@ -680,6 +691,7 @@ "CortexA78", "Cortex-A78 ARM processors", [ HasV8_2aOps, + FeatureCmpBccFusion, FeatureCrypto, FeatureFPARMv8, FeatureFuseAES, @@ -696,6 +708,7 @@ "CortexA78C", "Cortex-A78C ARM processors", [ HasV8_2aOps, + FeatureCmpBccFusion, FeatureCrypto, FeatureDotProd, FeatureFlagM, @@ -723,6 +736,7 @@ def ProcX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", "Cortex-X1 ARM processors", [ HasV8_2aOps, + FeatureCmpBccFusion, FeatureCrypto, FeatureFPARMv8, FeatureFuseAES, diff --git a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp --- a/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ b/llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -21,7 +21,7 @@ /// CMN, CMP, TST followed by Bcc static bool isArithmeticBccPair(const MachineInstr *FirstMI, - const MachineInstr &SecondMI) { + const MachineInstr &SecondMI, bool CmpOnly) { if (SecondMI.getOpcode() != AArch64::Bcc) return false; @@ -29,6 +29,13 @@ if (FirstMI == nullptr) return true; + // If we're in CmpOnly mode, we only fuse arithmetic instructions that + // discard their result. + if (CmpOnly && !(FirstMI->getOperand(0).getReg() == AArch64::XZR || + FirstMI->getOperand(0).getReg() == AArch64::WZR)) { + return false; + } + switch (FirstMI->getOpcode()) { case AArch64::ADDSWri: case AArch64::ADDSWrr: @@ -380,8 +387,11 @@ // All checking functions assume that the 1st instr is a wildcard if it is // unspecified. - if (ST.hasArithmeticBccFusion() && isArithmeticBccPair(FirstMI, SecondMI)) - return true; + if (ST.hasCmpBccFusion() || ST.hasArithmeticBccFusion()) { + bool CmpOnly = !ST.hasArithmeticBccFusion(); + if (isArithmeticBccPair(FirstMI, SecondMI, CmpOnly)) + return true; + } if (ST.hasArithmeticCbzFusion() && isArithmeticCbzPair(FirstMI, SecondMI)) return true; if (ST.hasFuseAES() && isAESPair(FirstMI, SecondMI)) diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -221,6 +221,7 @@ bool UseAlternateSExtLoadCVTF32Pattern = false; bool HasArithmeticBccFusion = false; bool HasArithmeticCbzFusion = false; + bool HasCmpBccFusion = false; bool HasFuseAddress = false; bool HasFuseAES = false; bool HasFuseArithmeticLogic = false; @@ -377,6 +378,7 @@ } bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; } bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } + bool hasCmpBccFusion() const { return HasCmpBccFusion; } bool hasFuseAddress() const { return HasFuseAddress; } bool hasFuseAES() const { return HasFuseAES; } bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; } diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll b/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll --- a/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-addr.ll @@ -1,4 +1,5 @@ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=fuse-address | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll --- a/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-aes.ll @@ -2,8 +2,14 @@ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic -mattr=+crypto | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a73 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a76 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a77 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a78 | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a78c| FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-x1 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll --- a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll @@ -1,6 +1,8 @@ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=-fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKDONT ; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-literals | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a65 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE +; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a72 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m3 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m4 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=exynos-m5 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECKFUSE