Index: lib/Target/AArch64/AArch64.td =================================================================== --- lib/Target/AArch64/AArch64.td +++ lib/Target/AArch64/AArch64.td @@ -156,6 +156,10 @@ "fuse-aes", "HasFuseAES", "true", "CPU fuses AES crypto operations">; +def FeatureFuseCryptoApple : SubtargetFeature< + "fuse-crypto", "HasFuseCryptoApple", "true", + "CPU fuses AES and PMULL crypto operations">; + def FeatureFuseCCSelect : SubtargetFeature< "fuse-csel", "HasFuseCCSelect", "true", "CPU fuses conditional select operations">; @@ -342,7 +346,7 @@ FeatureCrypto, FeatureDisableLatencySchedHeuristic, FeatureFPARMv8, - FeatureFuseAES, + FeatureFuseCryptoApple, FeatureNEON, FeaturePerfMon, FeatureZCRegMove, Index: lib/Target/AArch64/AArch64MacroFusion.cpp =================================================================== --- lib/Target/AArch64/AArch64MacroFusion.cpp +++ lib/Target/AArch64/AArch64MacroFusion.cpp @@ -123,6 +123,27 @@ return false; } +/// AES crypto encoding/decoding/pmull. +static bool isCryptoPair(unsigned FirstOpcode, unsigned SecondOpcode) { + switch (SecondOpcode) { + case AArch64::AESMCrr: + case AArch64::AESMCrrTied: + return FirstOpcode == AArch64::INSTRUCTION_LIST_END || + FirstOpcode == AArch64::AESErr; + case AArch64::AESIMCrr: + case AArch64::AESIMCrrTied: + return FirstOpcode == AArch64::INSTRUCTION_LIST_END || + FirstOpcode == AArch64::AESDrr; + case AArch64::EORv16i8: + return FirstOpcode == AArch64::INSTRUCTION_LIST_END || + FirstOpcode == AArch64::AESErr || + FirstOpcode == AArch64::AESDrr || + FirstOpcode == AArch64::PMULLv16i8 || + FirstOpcode == AArch64::PMULLv8i8; + } + return false; +} + /// Literal generation. static bool isLiteralsPair(unsigned FirstOpcode, unsigned SecondOpcode, const MachineInstr *FirstMI, @@ -258,6 +279,8 @@ return true; if (ST.hasFuseAES() && isAESPair(FirstOpc, SecondOpc)) return true; + if (ST.hasFuseCryptoApple() && isCryptoPair(FirstOpc, SecondOpc)) + return true; if (ST.hasFuseLiterals() && isLiteralsPair(FirstOpc, SecondOpc, FirstMI, SecondMI)) return true; Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -123,6 +123,7 @@ bool HasArithmeticCbzFusion = false; bool HasFuseAddress = false; bool HasFuseAES = false; + bool HasFuseCryptoApple = false; bool HasFuseCCSelect = false; bool HasFuseLiterals = false; bool DisableLatencySchedHeuristic = false; @@ -256,6 +257,7 @@ bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } bool hasFuseAddress() const { return HasFuseAddress; } bool hasFuseAES() const { return HasFuseAES; } + bool hasFuseCryptoApple() const { return HasFuseCryptoApple; } bool hasFuseCCSelect() const { return HasFuseCCSelect; } bool hasFuseLiterals() const { return HasFuseLiterals; } Index: test/CodeGen/AArch64/misched-fusion-aes.ll =================================================================== --- test/CodeGen/AArch64/misched-fusion-aes.ll +++ test/CodeGen/AArch64/misched-fusion-aes.ll @@ -1,4 +1,5 @@ ; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-aes,+crypto | FileCheck %s +; RUN: llc %s -o - -mtriple=aarch64-unknown -mattr=+fuse-crypto,+crypto | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=generic -mattr=+crypto | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a53 | FileCheck %s ; RUN: llc %s -o - -mtriple=aarch64-unknown -mcpu=cortex-a57 | FileCheck %s Index: test/CodeGen/AArch64/misched-fusion-crypto.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/misched-fusion-crypto.mir @@ -0,0 +1,75 @@ +# RUN: llc -o /dev/null %s -run-pass=machine-scheduler -mtriple aarch64-- -mattr=-fuse-aes,+crypto -misched-print-dags 2>&1 | FileCheck %s --check-prefixes=CHECK,NOFUSE +# RUN: llc -o /dev/null %s -run-pass=machine-scheduler -mtriple aarch64-- -mattr=+fuse-aes,+crypto -misched-print-dags 2>&1 | FileCheck %s --check-prefixes=CHECK,FUSEAES +# RUN: llc -o /dev/null %s -run-pass=machine-scheduler -mtriple aarch64-- -mattr=+fuse-crypto,+crypto -misched-print-dags 2>&1 | FileCheck %s --check-prefixes=CHECK,FUSEAES,FUSECRYPTO +# REQUIRES: asserts + +name: func +body: | + bb.0: + ; CHECK: SU(0): %0:fpr128 = AESErr undef $q0, undef $q1 + ; CHECK: Successors: + ; NOFUSE-NOT: SU({{.*}}): Ord + ; FUSEAES: SU(1): Ord Latency=0 Cluster + ; CHECK: SU(1): %1:fpr128 = AESMCrrTied %0:fpr128 + %0:fpr128 = AESErr undef $q0, undef $q1 + %1:fpr128 = AESMCrrTied %0 + + ; CHECK: SU(2): %2:fpr128 = AESErr undef $q2, undef $q3 + ; CHECK: Successors: + ; NOFUSE-NOT: SU({{.*}}): Ord + ; FUSEAES: SU(3): Ord Latency=0 Cluster + ; CHECK: SU(3): dead %3:fpr128 = AESMCrr %2:fpr128 + %2:fpr128 = AESErr undef $q2, undef $q3 + %3:fpr128 = AESMCrr %2 + + ; CHECK: SU(4): %4:fpr128 = AESErr %1:fpr128, undef $q4 + ; CHECK: Successors: + ; NOFUSE-NOT: SU({{.*}}): Ord + ; FUSEAES-NOT: SU({{.*}}): Ord + ; FUSECRYPTO: SU(5): Ord Latency=0 Cluster + ; CHECK: SU(5): dead %5:fpr128 = EORv16i8 %4:fpr128, undef $q5 + %4:fpr128 = AESErr %1, undef $q4 + %5:fpr128 = EORv16i8 %4, undef $q5 + + ; CHECK: SU(6): %6:fpr128 = AESDrr undef $q0, undef $q1 + ; CHECK: Successors: + ; NOFUSE-NOT: SU({{.*}}): Ord + ; FUSEAES: SU(7): Ord Latency=0 Cluster + ; CHECK: SU(7): %7:fpr128 = AESIMCrrTied %6:fpr128 + %6:fpr128 = AESDrr undef $q0, undef $q1 + %7:fpr128 = AESIMCrrTied %6 + + ; CHECK: SU(8): %8:fpr128 = AESDrr undef $q2, undef $q3 + ; CHECK: Successors: + ; NOFUSE-NOT: SU({{.*}}): Ord + ; FUSEAES: SU(9): Ord Latency=0 Cluster + ; CHECK: SU(9): dead %9:fpr128 = AESIMCrr %8:fpr128 + %8:fpr128 = AESDrr undef $q2, undef $q3 + %9:fpr128 = AESIMCrr %8 + + ; CHECK: SU(10): %10:fpr128 = AESDrr %7:fpr128, undef $q0 + ; CHECK: Successors: + ; NOFUSE-NOT: SU({{.*}}): Ord + ; FUSEAES-NOT: SU({{.*}}): Ord + ; FUSECRYPTO: SU(11): Ord Latency=0 Cluster + ; CHECK: SU(11): dead %11:fpr128 = EORv16i8 %10:fpr128, undef $q1 + %10:fpr128 = AESDrr %7, undef $q0 + %11:fpr128 = EORv16i8 %10, undef $q1 + + ; CHECK: SU(12): %12:fpr128 = PMULLv16i8 undef $q0, undef $q1 + ; CHECK: Successors: + ; NOFUSE-NOT: SU({{.*}}): Ord + ; FUSEAES-NOT: SU({{.*}}): Ord + ; FUSECRYPTO: SU(13): Ord Latency=0 Cluster + ; CHECK: SU(13): dead %13:fpr128 = EORv16i8 %12:fpr128, undef $q2 + %12:fpr128 = PMULLv16i8 undef $q0, undef $q1 + %13:fpr128 = EORv16i8 %12, undef $q2 + + ; CHECK: SU(14): %14:fpr128 = PMULLv8i8 undef $d0, undef $d1 + ; CHECK: Successors: + ; NOFUSE-NOT: SU({{.*}}): Ord + ; FUSEAES-NOT: SU({{.*}}): Ord + ; FUSECRYPTO: SU(15): Ord Latency=0 Cluster + ; CHECK: SU(15): dead %15:fpr128 = EORv16i8 %14:fpr128, undef $q3 + %14:fpr128 = PMULLv8i8 undef $d0, undef $d1 + %15:fpr128 = EORv16i8 %14, undef $q3