Index: llvm/lib/Target/AArch64/AArch64.td =================================================================== --- llvm/lib/Target/AArch64/AArch64.td +++ llvm/lib/Target/AArch64/AArch64.td @@ -103,10 +103,6 @@ "arith-cbz-fusion", "HasArithmeticCbzFusion", "true", "CPU fuses arithmetic + cbz/cbnz operations">; -def FeatureFuseAES : SubtargetFeature< - "fuse-aes", "HasFuseAES", "true", - "CPU fuses AES crypto operations">; - def FeatureFuseLiterals : SubtargetFeature< "fuse-literals", "HasFuseLiterals", "true", "CPU fuses literal generation operations">; @@ -190,7 +186,6 @@ FeatureCrypto, FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, - FeatureFuseAES, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, @@ -204,7 +199,6 @@ FeatureCrypto, FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, - FeatureFuseAES, FeatureFuseLiterals, FeatureNEON, FeaturePerfMon, @@ -217,9 +211,9 @@ FeatureCRC, FeatureCrypto, FeatureFPARMv8, - FeatureFuseAES, FeatureNEON, - FeaturePerfMon + FeaturePerfMon, + FeaturePostRAScheduler ]>; def ProcA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", @@ -227,9 +221,9 @@ FeatureCRC, FeatureCrypto, FeatureFPARMv8, - FeatureFuseAES, FeatureNEON, - FeaturePerfMon + FeaturePerfMon, + FeaturePostRAScheduler ]>; def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone", @@ -254,7 +248,6 @@ FeatureCrypto, FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, - FeatureFuseAES, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, Index: llvm/lib/Target/AArch64/AArch64MacroFusion.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64MacroFusion.cpp +++ llvm/lib/Target/AArch64/AArch64MacroFusion.cpp @@ -126,19 +126,6 @@ return true; } - if (ST.hasFuseAES()) - // Fuse AES crypto operations. - switch(FirstOpcode) { - // AES encode. - case AArch64::AESErr: - return SecondOpcode == AArch64::AESMCrr || - SecondOpcode == AArch64::INSTRUCTION_LIST_END; - // AES decode. - case AArch64::AESDrr: - return SecondOpcode == AArch64::AESIMCrr || - SecondOpcode == AArch64::INSTRUCTION_LIST_END; - } - if (ST.hasFuseLiterals()) // Fuse literal generation operations. switch (FirstOpcode) { Index: llvm/lib/Target/AArch64/AArch64SchedA53.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SchedA53.td +++ llvm/lib/Target/AArch64/AArch64SchedA53.td @@ -290,4 +290,12 @@ def : InstRW<[A53WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; def : InstRW<[A53WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; +//--- +// Cryptography Extensions +//--- +// FIXME: resources and costs below were wildly guessed. +def A53ReadAES : SchedReadCluster<6, [WriteV]>; +def : InstRW<[WriteV], (instregex "^AES[DE]")>; +def : InstRW<[WriteV, A53ReadAES], (instregex "^AESI?MC")>; + } Index: llvm/lib/Target/AArch64/AArch64SchedA57.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SchedA57.td +++ llvm/lib/Target/AArch64/AArch64SchedA57.td @@ -162,7 +162,7 @@ // Cryptography Extensions // ----------------------------------------------------------------------------- -def A57ReadAES : SchedReadAdvance<3, [A57Write_3cyc_1W]>; +def A57ReadAES : SchedReadCluster<3, [A57Write_3cyc_1W]>; def : InstRW<[A57Write_3cyc_1W], (instregex "^AES[DE]")>; def : InstRW<[A57Write_3cyc_1W, A57ReadAES], (instregex "^AESI?MC")>; def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>; Index: llvm/lib/Target/AArch64/AArch64SchedM1.td =================================================================== --- llvm/lib/Target/AArch64/AArch64SchedM1.td +++ llvm/lib/Target/AArch64/AArch64SchedM1.td @@ -634,7 +634,7 @@ WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; def : InstRW<[M1WriteVSTC], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; def : InstRW<[M1WriteVSTC, - WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; + WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; def : InstRW<[M1WriteVSTD], (instregex "ST2i(8|16|32)$")>; def : InstRW<[M1WriteVSTD, @@ -695,7 +695,7 @@ // Cryptography instructions. def M1WriteAES : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; } -def M1ReadAES : SchedReadAdvance<1, [M1WriteAES]>; +def M1ReadAES : SchedReadCluster<1, [M1WriteAES]>; def : InstRW<[M1WriteAES], (instregex "^AES[DE]")>; def : InstRW<[M1WriteAES, M1ReadAES], (instregex "^AESI?MC")>; Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -96,7 +96,6 @@ bool UseAlternateSExtLoadCVTF32Pattern = false; bool HasArithmeticBccFusion = false; bool HasArithmeticCbzFusion = false; - bool HasFuseAES = false; bool HasFuseLiterals = false; bool DisableLatencySchedHeuristic = false; bool UseRSqrt = false; @@ -216,7 +215,6 @@ } bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; } bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; } - bool hasFuseAES() const { return HasFuseAES; } bool hasFuseLiterals() const { return HasFuseLiterals; } bool useRSqrt() const { return UseRSqrt; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } Index: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -278,7 +278,7 @@ ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { const AArch64Subtarget &ST = C->MF->getSubtarget(); - if (ST.hasFuseAES() || ST.hasFuseLiterals()) { + if (ST.hasFuseLiterals()) { // Run the Macro Fusion after RA again since literals are expanded from // pseudos then (v. addPreSched2()). ScheduleDAGMI *DAG = createGenericSchedPostRA(C); Index: llvm/test/CodeGen/AArch64/misched-fusion-aes.ll =================================================================== --- llvm/test/CodeGen/AArch64/misched-fusion-aes.ll +++ llvm/test/CodeGen/AArch64/misched-fusion-aes.ll @@ -74,6 +74,7 @@ ret void ; CHECK-LABEL: aesea: + ; CHECKCORTEX: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} ; CHECKCORTEX-NEXT: aesmc {{v[0-7].16b}}, [[VA]] ; CHECKCORTEX: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} @@ -93,21 +94,21 @@ ; CHECKM1: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VA]] -; CHECKM1: aese [[VH:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} -; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VB]] -; CHECKM1: aese {{v[0-7].16b}}, {{v[0-7].16b}} ; CHECKM1: aese [[VC:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VC]] +; CHECKM1: aese {{v[0-7].16b}}, {{v[0-7].16b}} ; CHECKM1: aese [[VD:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VD]] -; CHECKM1: aesmc {{v[0-7].16b}}, [[VH]] ; CHECKM1: aese [[VE:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VE]] +; CHECKM1: aesmc {{v[0-7].16b}}, [[VB]] ; CHECKM1: aese [[VF:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VF]] ; CHECKM1: aese [[VG:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VG]] +; CHECKM1: aese [[VH:v[0-7].16b]], {{v[0-7].16b}} +; CHECKM1-NEXT: aesmc {{v[0-7].16b}}, [[VH]] } define void @aesda(<16 x i8>* %a0, <16 x i8>* %b0, <16 x i8>* %c0, <16 x i8> %d, <16 x i8> %e) { @@ -175,6 +176,7 @@ ret void ; CHECK-LABEL: aesda: + ; CHECKCORTEX: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}} ; CHECKCORTEX-NEXT: aesimc {{v[0-7].16b}}, [[VA]] ; CHECKCORTEX: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}} @@ -194,21 +196,21 @@ ; CHECKM1: aesd [[VA:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VA]] -; CHECKM1: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1: aesd [[VB:v[0-7].16b]], {{v[0-7].16b}} -; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VB]] -; CHECKM1: aesd {{v[0-7].16b}}, {{v[0-7].16b}} ; CHECKM1: aesd [[VC:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VC]] +; CHECKM1: aesd {{v[0-7].16b}}, {{v[0-7].16b}} ; CHECKM1: aesd [[VD:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VD]] -; CHECKM1: aesimc {{v[0-7].16b}}, [[VH]] ; CHECKM1: aesd [[VE:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VE]] +; CHECKM1: aesimc {{v[0-7].16b}}, [[VB]] ; CHECKM1: aesd [[VF:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VF]] ; CHECKM1: aesd [[VG:v[0-7].16b]], {{v[0-7].16b}} ; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VG]] +; CHECKM1: aesd [[VH:v[0-7].16b]], {{v[0-7].16b}} +; CHECKM1-NEXT: aesimc {{v[0-7].16b}}, [[VH]] } define void @aes_load_store(<16 x i8> *%p1, <16 x i8> *%p2 , <16 x i8> *%p3) {