Index: llvm/trunk/lib/Target/AArch64/AArch64.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64.td +++ llvm/trunk/lib/Target/AArch64/AArch64.td @@ -103,6 +103,10 @@ "arith-cbz-fusion", "HasArithmeticCbzFusion", "true", "CPU fuses arithmetic + cbz/cbnz operations">; +def FeatureFuseAES : SubtargetFeature< + "fuse-aes", "HasFuseAES", "true", + "CPU fuses AES crypto operations">; + def FeatureDisableLatencySchedHeuristic : SubtargetFeature< "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", "Disable latency scheduling heuristic">; @@ -184,6 +188,7 @@ FeatureCrypto, FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, @@ -230,6 +235,7 @@ FeatureCrypto, FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, + FeatureFuseAES, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, Index: llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -116,6 +116,19 @@ return true; } + if (ST.hasFuseAES()) + // Fuse AES crypto operations. + switch(FirstOpcode) { + // AES encode. + case AArch64::AESErr: + return SecondOpcode == AArch64::AESMCrr || + SecondOpcode == AArch64::INSTRUCTION_LIST_END; + // AES decode. + case AArch64::AESDrr: + return SecondOpcode == AArch64::AESIMCrr || + SecondOpcode == AArch64::INSTRUCTION_LIST_END; + } + return false; } Index: llvm/trunk/lib/Target/AArch64/AArch64SchedA57.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64SchedA57.td +++ llvm/trunk/lib/Target/AArch64/AArch64SchedA57.td @@ -162,7 +162,9 @@ // Cryptography Extensions // ----------------------------------------------------------------------------- -def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>; +def A57ReadAES : SchedReadAdvance<3, [A57Write_3cyc_1W]>; +def : InstRW<[A57Write_3cyc_1W], (instregex "^AES[DE]")>; +def : InstRW<[A57Write_3cyc_1W, A57ReadAES], (instregex "^AESI?MC")>; def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>; def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>; def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>; Index: llvm/trunk/lib/Target/AArch64/AArch64SchedM1.td =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64SchedM1.td +++ llvm/trunk/lib/Target/AArch64/AArch64SchedM1.td @@ -366,7 +366,8 @@ // Cryptography instructions. def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; } def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>; -def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AES")>; +def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>; +def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>; def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>; def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>; Index: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h @@ -84,6 +84,7 @@ bool UseAlternateSExtLoadCVTF32Pattern = false; bool HasArithmeticBccFusion = false; bool HasArithmeticCbzFusion = false; + bool HasFuseAES = false; bool DisableLatencySchedHeuristic = false; bool UseRSqrt = false; uint8_t MaxInterleaveFactor = 2; @@ -197,6 +198,7 @@ } bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; } bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } + bool hasFuseAES() const { return HasFuseAES; } bool useRSqrt() const { return UseRSqrt; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } unsigned getVectorInsertExtractBaseCost() const {