Index: llvm/lib/Target/AArch64/AArch64MachineScheduler.h =================================================================== --- /dev/null +++ llvm/lib/Target/AArch64/AArch64MachineScheduler.h @@ -0,0 +1,32 @@ +//===- AArch64MachineScheduler.h - Custom AArch64 MI scheduler --*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Custom AArch64 MI scheduler. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINESCHEDULER_H +#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINESCHEDULER_H + +#include "llvm/CodeGen/MachineScheduler.h" + +namespace llvm { + +/// A MachineSchedStrategy implementation for AArch64 post RA scheduling. +class AArch64PostRASchedStrategy : public PostGenericScheduler { +public: + AArch64PostRASchedStrategy(const MachineSchedContext *C) + : PostGenericScheduler(C) {} + +protected: + bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) override; +}; + +} // end namespace llvm + +#endif Index: llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp =================================================================== --- /dev/null +++ llvm/lib/Target/AArch64/AArch64MachineScheduler.cpp @@ -0,0 +1,35 @@ +//===- AArch64MachineScheduler.cpp - MI Scheduler for AArch64 -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AArch64MachineScheduler.h" +#include "MCTargetDesc/AArch64MCTargetDesc.h" + +using namespace llvm; + +bool AArch64PostRASchedStrategy::tryCandidate(SchedCandidate &Cand, + SchedCandidate &TryCand) { + bool OriginalResult = PostGenericScheduler::tryCandidate(Cand, TryCand); + + if (Cand.isValid()) { + MachineInstr *Instr0 = TryCand.SU->getInstr(); + MachineInstr *Instr1 = Cand.SU->getInstr(); + if (Instr0 && Instr1 && Instr0->getOpcode() == Instr1->getOpcode() && + Instr0->getOpcode() == AArch64::STPQi) { + MachineOperand &Base0 = Instr0->getOperand(2); + MachineOperand &Base1 = Instr1->getOperand(2); + int64_t Off0 = Instr0->getOperand(3).getImm(); + int64_t Off1 = Instr1->getOperand(3).getImm(); + if (Base0.isIdenticalTo(Base1) && llabs(Off0 - Off1) >= 2) { + TryCand.Reason = NodeOrder; + return Off0 < Off1; + } + } + } + + return OriginalResult; +} Index: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -12,6 +12,7 @@ #include "AArch64TargetMachine.h" #include "AArch64.h" #include "AArch64MachineFunctionInfo.h" +#include "AArch64MachineScheduler.h" #include "AArch64MacroFusion.h" #include "AArch64Subtarget.h" #include "AArch64TargetObjectFile.h" @@ -474,10 +475,12 @@ ScheduleDAGInstrs * createPostMachineScheduler(MachineSchedContext *C) const override { const AArch64Subtarget &ST = C->MF->getSubtarget(); + ScheduleDAGMI *DAG = + new ScheduleDAGMI(C, std::make_unique(C), + /* RemoveKillFlags=*/true); if (ST.hasFusion()) { // Run the Macro Fusion after RA again since literals are expanded from // pseudos then (v. addPreSched2()). - ScheduleDAGMI *DAG = createGenericSchedPostRA(C); DAG->addMutation(createAArch64MacroFusionDAGMutation()); return DAG; } Index: llvm/lib/Target/AArch64/CMakeLists.txt =================================================================== --- llvm/lib/Target/AArch64/CMakeLists.txt +++ llvm/lib/Target/AArch64/CMakeLists.txt @@ -65,6 +65,7 @@ AArch64LoadStoreOptimizer.cpp AArch64LowerHomogeneousPrologEpilog.cpp AArch64MachineFunctionInfo.cpp + AArch64MachineScheduler.cpp AArch64MacroFusion.cpp AArch64MIPeepholeOpt.cpp AArch64MCInstLower.cpp Index: llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll +++ llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll @@ -62,11 +62,11 @@ ; CHECK-NEXT: mov x24, x5 ; CHECK-NEXT: mov x25, x6 ; CHECK-NEXT: mov x26, x7 -; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill +; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill ; CHECK-NEXT: mov x27, x8 -; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill ; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill -; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill +; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill +; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill ; CHECK-NEXT: bl _puts ; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload ; CHECK-NEXT: mov w0, w19 @@ -132,11 +132,11 @@ ; CHECK-NEXT: mov x24, x5 ; CHECK-NEXT: mov x25, x6 ; CHECK-NEXT: mov x26, x7 -; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill +; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill ; CHECK-NEXT: mov x27, x8 -; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill ; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill -; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill +; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill +; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill ; CHECK-NEXT: str x10, [x9] ; CHECK-NEXT: bl _get_f ; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload Index: llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll =================================================================== --- llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll +++ llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll @@ -53,8 +53,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: str xzr, [x8, #64] -; CHECK-NEXT: stp q0, q0, [x8, #32] ; CHECK-NEXT: stp q0, q0, [x8] +; CHECK-NEXT: stp q0, q0, [x8, #32] ; CHECK-NEXT: ret ret [ 9 x double ] zeroinitializer } @@ -232,8 +232,8 @@ ; CHECK-LABEL: array_of_struct_in_memory: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: stp q0, q0, [x8, #48] ; CHECK-NEXT: stp q0, q0, [x8, #16] +; CHECK-NEXT: stp q0, q0, [x8, #48] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret ret [ 5 x %T_STRUCT_SAMEM ] zeroinitializer @@ -350,8 +350,8 @@ ; CHECK-LABEL: array_of_struct_nested_same_field_types_2: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: stp q0, q0, [x8, #48] ; CHECK-NEXT: stp q0, q0, [x8, #16] +; CHECK-NEXT: stp q0, q0, [x8, #48] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret ret [ 2 x %T_NESTED_STRUCT_SAMEM ] zeroinitializer @@ -440,8 +440,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: str xzr, [x8, #64] -; CHECK-NEXT: stp q0, q0, [x8, #32] ; CHECK-NEXT: stp q0, q0, [x8] +; CHECK-NEXT: stp q0, q0, [x8, #32] ; CHECK-NEXT: ret ret %T_IN_MEMORY zeroinitializer } Index: llvm/test/CodeGen/AArch64/arm64-memset-inline.ll =================================================================== --- llvm/test/CodeGen/AArch64/arm64-memset-inline.ll +++ llvm/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -52,8 +52,8 @@ ; CHECK-LABEL: bzero_64_heap: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: stp q0, q0, [x0] +; CHECK-NEXT: stp q0, q0, [x0, #32] ; CHECK-NEXT: ret call void @llvm.memset.p0i8.i64(i8* align 8 %c, i8 0, i64 64, i1 false) ret void @@ -230,8 +230,8 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 @@ -253,8 +253,8 @@ ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str xzr, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 @@ -275,10 +275,10 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #144 @@ -300,14 +300,14 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #224] -; CHECK-NEXT: stp q0, q0, [sp, #192] -; CHECK-NEXT: stp q0, q0, [sp, #160] -; CHECK-NEXT: stp q0, q0, [sp, #128] -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #96] +; CHECK-NEXT: stp q0, q0, [sp, #128] +; CHECK-NEXT: stp q0, q0, [sp, #160] +; CHECK-NEXT: stp q0, q0, [sp, #192] +; CHECK-NEXT: stp q0, q0, [sp, #224] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #272 @@ -497,8 +497,8 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 @@ -521,8 +521,8 @@ ; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str x8, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 @@ -543,10 +543,10 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #128] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #144 @@ -568,14 +568,14 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: stp q0, q0, [sp, #224] -; CHECK-NEXT: stp q0, q0, [sp, #192] -; CHECK-NEXT: stp q0, q0, [sp, #160] -; CHECK-NEXT: stp q0, q0, [sp, #128] -; CHECK-NEXT: stp q0, q0, [sp, #96] -; CHECK-NEXT: stp q0, q0, [sp, #64] -; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: stp q0, q0, [sp, #64] +; CHECK-NEXT: stp q0, q0, [sp, #96] +; CHECK-NEXT: stp q0, q0, [sp, #128] +; CHECK-NEXT: stp q0, q0, [sp, #160] +; CHECK-NEXT: stp q0, q0, [sp, #192] +; CHECK-NEXT: stp q0, q0, [sp, #224] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldp x29, x30, [sp, #256] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #272 Index: llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll =================================================================== --- llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -152,15 +152,11 @@ ; CHECK-NEXT: stp q13, q12, [x8] ; CHECK-NEXT: stp q11, q10, [x8, #32] ; CHECK-NEXT: stp q9, q8, [x8, #64] -; CHECK-NEXT: stp q4, q15, [x8, #432] -; CHECK-NEXT: stp q14, q3, [x8, #464] -; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: stp q31, q30, [x8, #96] ; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: stp q29, q28, [x8, #144] +; CHECK-NEXT: stp q31, q30, [x8, #96] ; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: stp q29, q28, [x8, #144] ; CHECK-NEXT: stp q27, q26, [x8, #176] -; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: str q25, [x8, #208] ; CHECK-NEXT: stp q24, q23, [x8, #240] ; CHECK-NEXT: stp q22, q21, [x8, #272] @@ -168,7 +164,11 @@ ; CHECK-NEXT: stp q18, q17, [x8, #336] ; CHECK-NEXT: stp q16, q7, [x8, #368] ; CHECK-NEXT: stp q6, q5, [x8, #400] +; CHECK-NEXT: stp q4, q15, [x8, #432] +; CHECK-NEXT: stp q14, q3, [x8, #464] +; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: str q2, [x8, #496] +; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret ; CH`ECK-NEXT: .cfi_offset b9, -16 Index: llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn +++ llvm/utils/gn/secondary/llvm/lib/Target/AArch64/BUILD.gn @@ -131,6 +131,7 @@ "AArch64MCInstLower.cpp", "AArch64MIPeepholeOpt.cpp", "AArch64MachineFunctionInfo.cpp", + "AArch64MachineScheduler.cpp", "AArch64MacroFusion.cpp", "AArch64PBQPRegAlloc.cpp", "AArch64PromoteConstant.cpp",