diff --git a/llvm/lib/Target/AArch64/AArch64MachineScheduler.h b/llvm/lib/Target/AArch64/AArch64MachineScheduler.h new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64MachineScheduler.h @@ -0,0 +1,33 @@ +//===- AArch64MachineScheduler.h - Custom AArch64 MI scheduler --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Custom AArch64 MI scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINESCHEDULER_H +#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINESCHEDULER_H + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// A MachineSchedStrategy implementation for AArch64 post RA scheduling. +class AArch64PostRASchedStrategy : public PostGenericScheduler { +public: + AArch64PostRASchedStrategy(const MachineSchedContext *C) : + PostGenericScheduler(C) {} + +protected: + bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override; +}; + +} // end namespace llvm + +#endif + diff --git a/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp b/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp @@ -0,0 +1,39 @@ +//===- AArch64MachineScheduler.cpp - MI Scheduler for AArch64 -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AArch64MachineScheduler.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" + +using namespace llvm; + +bool AArch64PostRASchedStrategy::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand) { + bool OriginalResult = PostGenericScheduler::tryCandidate(Cand, TryCand); + + if (Cand.isValid()) { + MachineInstr *Instr0 = TryCand.SU->getInstr(); + MachineInstr *Instr1 = Cand.SU->getInstr(); + // When dealing with two STPqi's. + if (Instr0 && Instr1 && Instr0->getOpcode() == Instr1->getOpcode () && + Instr0->getOpcode() == AArch64::STPQi) + { + MachineOperand &Base0 = Instr0->getOperand(2); + MachineOperand &Base1 = Instr1->getOperand(2); + int64_t Off0 = Instr0->getOperand(3).getImm(); + int64_t Off1 = Instr1->getOperand(3).getImm(); + // With the same base address and non-overlapping writes. + if (Base0.isIdenticalTo(Base1) && llabs (Off0 - Off1) >= 2) { + TryCand.Reason = NodeOrder; + // Order them by ascending offsets. + return Off0 < Off1; + } + } + } + + return OriginalResult; +} diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp --- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -12,6 +12,7 @@ #include "AArch64TargetMachine.h" #include "AArch64.h" #include "AArch64MachineFunctionInfo.h" +#include "AArch64MachineScheduler.h" #include "AArch64MacroFusion.h" #include "AArch64Subtarget.h" #include "AArch64TargetObjectFile.h" @@ -474,15 +475,17 @@ ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { const AArch64Subtarget &ST = C->MF->getSubtarget(); + ScheduleDAGMI *DAG = + new ScheduleDAGMI(C, std::make_unique(C), + /* RemoveKillFlags=*/true); if (ST.hasFusion()) { // Run the Macro Fusion after RA again since literals are expanded from // pseudos then (v. addPreSched2()). - ScheduleDAGMI *DAG = createGenericSchedPostRA(C); DAG->addMutation(createAArch64MacroFusionDAGMutation()); return DAG; } - return nullptr; + return DAG; } void addIRPasses() override; diff --git a/llvm/lib/Target/AArch64/CMakeLists.txt b/llvm/lib/Target/AArch64/CMakeLists.txt --- a/llvm/lib/Target/AArch64/CMakeLists.txt +++ b/llvm/lib/Target/AArch64/CMakeLists.txt @@ -65,6 +65,7 @@ AArch64LoadStoreOptimizer.cpp AArch64LowerHomogeneousPrologEpilog.cpp AArch64MachineFunctionInfo.cpp + AArch64MachineScheduler.cpp AArch64MacroFusion.cpp AArch64MIPeepholeOpt.cpp AArch64MCInstLower.cpp diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll @@ -62,11 +62,11 @@ ; CHECK-NEXT: mov x24, x5 ; CHECK-NEXT: mov x25, x6 ; CHECK-NEXT: mov x26, x7 -; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill +; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill ; CHECK-NEXT: mov x27, x8 -; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill ; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill -; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill +; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill +; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill ; CHECK-NEXT: bl _puts ; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload ; CHECK-NEXT: mov w0, w19 @@ -132,11 +132,11 @@ ; CHECK-NEXT: mov x24, x5 ; CHECK-NEXT: mov x25, x6 ; CHECK-NEXT: mov x26, x7 -; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill +; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill ; CHECK-NEXT: mov x27, x8 -; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill ; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill -; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill +; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill +; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill ; CHECK-NEXT: str x10, [x9] ; CHECK-NEXT: bl _get_f ; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll --- a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll +++ b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll @@ -53,8 +53,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: str xzr, [x8, #64] -; CHECK-NEXT: stp q0, q0, [x8, #32] ; CHECK-NEXT: stp q0, q0, [x8] +; CHECK-NEXT: stp q0, q0, [x8, #32] ; CHECK-NEXT: ret ret [ 9 x double ] zeroinitializer } @@ -232,8 +232,8 @@ ; CHECK-LABEL: array_of_struct_in_memory: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: stp q0, q0, [x8, #48] ; CHECK-NEXT: stp q0, q0, [x8, #16] +; CHECK-NEXT: stp q0, q0, [x8, #48] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret ret [ 5 x %T_STRUCT_SAMEM ] zeroinitializer @@ -350,8 +350,8 @@ ; CHECK-LABEL: array_of_struct_nested_same_field_types_2: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: stp q0, q0, [x8, #48] ; CHECK-NEXT: stp q0, q0, [x8, #16] +; CHECK-NEXT: stp q0, q0, [x8, #48] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret ret [ 2 x %T_NESTED_STRUCT_SAMEM ] zeroinitializer @@ -440,8 +440,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: str xzr, [x8, #64] -; CHECK-NEXT: stp q0, q0, [x8, #32] ; CHECK-NEXT: stp q0, q0, [x8] +; CHECK-NEXT: stp q0, q0, [x8, #32] ; CHECK-NEXT: ret ret %T_IN_MEMORY zeroinitializer } diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll --- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -52,8 +52,8 @@ ; CHECK-LABEL: bzero_64_heap: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: stp q0, q0, [x0] +; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: ret call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 64, i1 false) ret void @@ -230,8 +230,8 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 @@ -253,8 +253,8 @@ ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str xzr, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 @@ -275,10 +275,10 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #144 @@ -300,14 +300,14 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #224] -; CHECK-NEXT: stp q0, q0, [sp, #192] -; CHECK-NEXT: stp q0, q0, [sp, #160] -; CHECK-NEXT: stp q0, q0, [sp, #128] -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #96] +; CHECK-NEXT: stp q0, q0, [sp, #128] +; CHECK-NEXT: stp q0, q0, [sp, #160] +; CHECK-NEXT: stp q0, q0, [sp, #192] +; CHECK-NEXT: stp q0, q0, [sp, #224] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #272 @@ -497,8 +497,8 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 @@ -521,8 +521,8 @@ ; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str x8, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 @@ -543,10 +543,10 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #144 @@ -568,14 +568,14 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #224] -; CHECK-NEXT: stp q0, q0, [sp, #192] -; CHECK-NEXT: stp q0, q0, [sp, #160] -; CHECK-NEXT: stp q0, q0, [sp, #128] -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #96] +; CHECK-NEXT: stp q0, q0, [sp, #128] +; CHECK-NEXT: stp q0, q0, [sp, #160] +; CHECK-NEXT: stp q0, q0, [sp, #192] +; CHECK-NEXT: stp q0, q0, [sp, #224] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #272 diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -152,15 +152,11 @@ ; CHECK-NEXT: stp q13, q12, [x8] ; CHECK-NEXT: stp q11, q10, [x8, #32] ; CHECK-NEXT: stp q9, q8, [x8, #64] -; CHECK-NEXT: stp q4, q15, [x8, #432] -; CHECK-NEXT: stp q14, q3, [x8, #464] -; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: stp q31, q30, [x8, #96] ; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: stp q29, q28, [x8, #144] +; CHECK-NEXT: stp q31, q30, [x8, #96] ; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: stp q29, q28, [x8, #144] ; CHECK-NEXT: stp q27, q26, [x8, #176] -; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: str q25, [x8, #208] ; CHECK-NEXT: stp q24, q23, [x8, #240] ; CHECK-NEXT: stp q22, q21, [x8, #272] @@ -168,7 +164,11 @@ ; CHECK-NEXT: stp q18, q17, [x8, #336] ; CHECK-NEXT: stp q16, q7, [x8, #368] ; CHECK-NEXT: stp q6, q5, [x8, #400] +; CHECK-NEXT: stp q4, q15, [x8, #432] +; CHECK-NEXT: stp q14, q3, [x8, #464] +; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: str q2, [x8, #496] +; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore b8 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll @@ -249,9 +249,9 @@ ; NO_SVE-NEXT: ldr q26, [x1, #32] ; NO_SVE-NEXT: ldr q27, [x1, #16] ; NO_SVE-NEXT: ldr q11, [x1] +; NO_SVE-NEXT: stp q3, q2, [x0, #192] ; NO_SVE-NEXT: stp q1, q0, [x0, #224] ; NO_SVE-NEXT: mov v0.16b, v8.16b -; NO_SVE-NEXT: stp q3, q2, [x0, #192] ; NO_SVE-NEXT: mov v1.16b, v8.16b ; NO_SVE-NEXT: mov v2.16b, v8.16b ; NO_SVE-NEXT: bsl v0.16b, v5.16b, v29.16b @@ -531,9 +531,9 @@ ; NO_SVE-NEXT: ldr q26, [x1, #32] ; NO_SVE-NEXT: ldr q27, [x1, #16] ; NO_SVE-NEXT: ldr q11, [x1] +; NO_SVE-NEXT: stp q3, q2, [x0, #192] ; NO_SVE-NEXT: stp q1, q0, [x0, #224] ; NO_SVE-NEXT: mov v0.16b, v8.16b -; NO_SVE-NEXT: stp q3, q2, [x0, #192] ; NO_SVE-NEXT: mov v1.16b, v8.16b ; NO_SVE-NEXT: mov v2.16b, v8.16b ; NO_SVE-NEXT: bsl v0.16b, v5.16b, v29.16b @@ -813,9 +813,9 @@ ; NO_SVE-NEXT: ldr q26, [x1, #32] ; NO_SVE-NEXT: ldr q27, [x1, #16] ; NO_SVE-NEXT: ldr q11, [x1] +; NO_SVE-NEXT: stp q3, q2, [x0, #192] ; NO_SVE-NEXT: stp q1, q0, [x0, #224] ; NO_SVE-NEXT: mov v0.16b, v8.16b -; NO_SVE-NEXT: stp q3, q2, [x0, #192] ; NO_SVE-NEXT: mov v1.16b, v8.16b ; NO_SVE-NEXT: mov v2.16b, v8.16b ; NO_SVE-NEXT: bsl v0.16b, v5.16b, v29.16b diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn @@ -131,6 +131,7 @@ "AArch64MCInstLower.cpp", "AArch64MIPeepholeOpt.cpp", "AArch64MachineFunctionInfo.cpp", + "AArch64MachineScheduler.cpp", "AArch64MacroFusion.cpp", "AArch64PBQPRegAlloc.cpp", "AArch64PromoteConstant.cpp",