Index: llvm/include/llvm/CodeGen/TargetSubtargetInfo.h =================================================================== --- llvm/include/llvm/CodeGen/TargetSubtargetInfo.h +++ llvm/include/llvm/CodeGen/TargetSubtargetInfo.h @@ -206,6 +206,10 @@ /// which is the preferred way to influence this. virtual bool enablePostRAScheduler() const; + /// True if the subtarget should run a machine scheduler after register + /// allocation. + virtual bool enablePostRAMachineScheduler() const; + /// True if the subtarget should run the atomic expansion pass. virtual bool enableAtomicExpand() const; Index: llvm/lib/CodeGen/MachineScheduler.cpp =================================================================== --- llvm/lib/CodeGen/MachineScheduler.cpp +++ llvm/lib/CodeGen/MachineScheduler.cpp @@ -402,7 +402,7 @@ if (EnablePostRAMachineSched.getNumOccurrences()) { if (!EnablePostRAMachineSched) return false; - } else if (!mf.getSubtarget().enablePostRAScheduler()) { + } else if (!mf.getSubtarget().enablePostRAMachineScheduler()) { LLVM_DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); return false; } Index: llvm/lib/CodeGen/TargetSubtargetInfo.cpp =================================================================== --- llvm/lib/CodeGen/TargetSubtargetInfo.cpp +++ llvm/lib/CodeGen/TargetSubtargetInfo.cpp @@ -54,6 +54,10 @@ return getSchedModel().PostRAScheduler; } +bool TargetSubtargetInfo::enablePostRAMachineScheduler() const { + return enableMachineScheduler() && enablePostRAScheduler(); +} + bool TargetSubtargetInfo::useAA() const { return false; } Index: llvm/lib/Target/ARM/ARMSubtarget.h =================================================================== --- llvm/lib/Target/ARM/ARMSubtarget.h +++ llvm/lib/Target/ARM/ARMSubtarget.h @@ -806,6 +806,9 @@ /// True for some subtargets at > -O0. bool enablePostRAScheduler() const override; + /// True for some subtargets at > -O0. + bool enablePostRAMachineScheduler() const override; + /// Enable use of alias analysis during code generation (during MI /// scheduling, DAGCombine, etc.). bool useAA() const override { return UseAA; } Index: llvm/lib/Target/ARM/ARMSubtarget.cpp =================================================================== --- llvm/lib/Target/ARM/ARMSubtarget.cpp +++ llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -381,9 +381,19 @@ // This overrides the PostRAScheduler bit in the SchedModel for any CPU. bool ARMSubtarget::enablePostRAScheduler() const { + if (enableMachineScheduler()) + return false; + if (disablePostRAScheduler()) + return false; + // Thumb1 cores will generally not benefit from post-ra scheduling + return !isThumb1Only(); +} + +bool ARMSubtarget::enablePostRAMachineScheduler() const { + if (!enableMachineScheduler()) + return false; if (disablePostRAScheduler()) return false; - // Don't reschedule potential IT blocks. return !isThumb1Only(); } Index: llvm/lib/Target/ARM/ARMTargetMachine.h =================================================================== --- llvm/lib/Target/ARM/ARMTargetMachine.h +++ llvm/lib/Target/ARM/ARMTargetMachine.h @@ -70,6 +70,8 @@ TargetTriple.isOSWindows() || TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16; } + + bool targetSchedulesPostRAScheduling() const override { return true; }; }; /// ARM/Thumb little endian target machine. Index: llvm/lib/Target/ARM/ARMTargetMachine.cpp =================================================================== --- llvm/lib/Target/ARM/ARMTargetMachine.cpp +++ llvm/lib/Target/ARM/ARMTargetMachine.cpp @@ -322,14 +322,7 @@ class ARMPassConfig : public TargetPassConfig { public: ARMPassConfig(ARMBaseTargetMachine &TM, PassManagerBase &PM) - : TargetPassConfig(TM, PM) { - if (TM.getOptLevel() != CodeGenOpt::None) { - ARMGenSubtargetInfo STI(TM.getTargetTriple(), TM.getTargetCPU(), - TM.getTargetFeatureString()); - if (STI.hasFeature(ARM::FeatureUseMISched)) - substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); - } - } + : TargetPassConfig(TM, PM) {} ARMBaseTargetMachine &getARMTargetMachine() const { return getTM(); @@ -523,6 +516,11 @@ } addPass(createMVEVPTBlockPass()); addPass(createThumb2ITBlockPass()); + + // Add both scheduling passes to give the subtarget an opertunity to pick + // between them. + addPass(&PostMachineSchedulerID); + addPass(&PostRASchedulerID); } void ARMPassConfig::addPreEmitPass() { Index: llvm/test/CodeGen/ARM/O3-pipeline.ll =================================================================== --- llvm/test/CodeGen/ARM/O3-pipeline.ll +++ llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -138,6 +138,7 @@ ; CHECK-NEXT: Thumb IT blocks insertion pass ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction +; CHECK-NEXT: PostRA Machine Instruction Scheduler ; CHECK-NEXT: Post RA top-down list latency scheduler ; CHECK-NEXT: Analyze Machine Code For Garbage Collection ; CHECK-NEXT: Machine Block Frequency Analysis Index: llvm/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s ; @a = global i32 0, align 4 Index: llvm/test/CodeGen/ARM/cortex-a57-misched-ldm.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-ldm.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-ldm.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s ; CHECK: ********** MI Scheduling ********** ; We need second, post-ra scheduling to have LDM instruction combined from single-loads Index: llvm/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s ; N=3 STMIA_UPD should have latency 2cyc and writeback latency 1cyc ; CHECK: ********** MI Scheduling ********** Index: llvm/test/CodeGen/ARM/cortex-a57-misched-stm.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-stm.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-stm.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s ; N=3 STMIB should have latency 2cyc ; CHECK: ********** MI Scheduling ********** Index: llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s ; @a = global double 0.0, align 4 Index: llvm/test/CodeGen/ARM/cortex-a57-misched-vldm.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-vldm.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-vldm.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s ; CHECK: ********** MI Scheduling ********** ; We need second, post-ra scheduling to have VLDM instruction combined from single-loads Index: llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s ; CHECK: ********** MI Scheduling ********** ; We need second, post-ra scheduling to have VSTM instruction combined from single-stores Index: llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll =================================================================== --- llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll +++ llvm/test/CodeGen/ARM/cortex-a57-misched-vstm.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -mattr=use-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s ; CHECK: ********** MI Scheduling ********** ; We need second, post-ra scheduling to have VSTM instruction combined from single-stores Index: llvm/test/CodeGen/ARM/postrasched.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/postrasched.ll @@ -0,0 +1,30 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=thumbv8m.main-none-eabi -debug-only=machine-scheduler,post-RA-sched -print-before=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s + +; CHECK-LABEL: test_misched +; Pre and post ra machine scheduling +; CHECK: ********** MI Scheduling ********** +; CHECK: t2LDRi12 +; CHECK: Latency : 2 +; CHECK: ********** MI Scheduling ********** +; CHECK: t2LDRi12 +; CHECK: Latency : 2 + +define i32 @test_misched(i32* %ptr) "target-cpu"="cortex-m33" { +entry: + %l = load i32, i32* %ptr + store i32 0, i32* %ptr + ret i32 %l +} + +; CHECK-LABEL: test_rasched +; CHECK: Subtarget disables post-MI-sched. +; CHECK: ********** List Scheduling ********** + +define i32 @test_rasched(i32* %ptr) { +entry: + %l = load i32, i32* %ptr + store i32 0, i32* %ptr + ret i32 %l +} +