diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -61,6 +61,7 @@ class TargetSchedModel; class TargetSubtargetInfo; enum class MachineCombinerPattern; +enum class MachineTraceStrategy; template class SmallVectorImpl; @@ -1251,6 +1252,9 @@ /// Return true when a target supports MachineCombiner. virtual bool useMachineCombiner() const { return false; } + /// Return a strategy that MachineCombiner must use when creating traces. + virtual MachineTraceStrategy getMachineCombinerTraceStrategy() const; + /// Return true if the given SDNode can be copied during scheduling /// even if it has glue. virtual bool canCopyGluedNodeDuringSchedule(SDNode *N) const { return false; } diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -95,7 +95,8 @@ bool isTransientMI(const MachineInstr *MI); unsigned getDepth(SmallVectorImpl &InsInstrs, DenseMap &InstrIdxForVirtReg, - MachineTraceMetrics::Trace BlockTrace); + MachineTraceMetrics::Trace BlockTrace, + const MachineBasicBlock &MBB); unsigned getLatency(MachineInstr *Root, MachineInstr *NewRoot, MachineTraceMetrics::Trace BlockTrace); bool @@ -207,7 +208,8 @@ unsigned MachineCombiner::getDepth(SmallVectorImpl &InsInstrs, DenseMap &InstrIdxForVirtReg, - MachineTraceMetrics::Trace BlockTrace) { + MachineTraceMetrics::Trace BlockTrace, + const MachineBasicBlock &MBB) { SmallVector InstrDepth; // For each instruction in the new sequence compute the depth based on the // operands. Use the trace information when possible. For new operands which @@ -237,7 +239,9 @@ InstrPtr, UseIdx); } else { MachineInstr *DefInstr = getOperandDef(MO); - if (DefInstr) { + if (DefInstr && (TII->getMachineCombinerTraceStrategy() != + MachineTraceStrategy::TS_Local || + DefInstr->getParent() == &MBB)) { DepthOp = BlockTrace.getInstrCycles(*DefInstr).Depth; if (!isTransientMI(DefInstr)) LatencyOp = TSchedModel.computeOperandLatency( @@ -374,7 +378,8 @@ MachineCombinerPattern Pattern, bool SlackIsAccurate) { // Get depth and latency of NewRoot and Root. - unsigned NewRootDepth = getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace); + unsigned NewRootDepth = + getDepth(InsInstrs, InstrIdxForVirtReg, BlockTrace, *MBB); unsigned RootDepth = BlockTrace.getInstrCycles(*Root).Depth; LLVM_DEBUG(dbgs() << " Dependence data for " << *Root << "\tNewRootDepth: " @@ -574,7 +579,7 @@ // Check if the block is in a loop. const MachineLoop *ML = MLI->getLoopFor(MBB); if (!TraceEnsemble) - TraceEnsemble = Traces->getEnsemble(MachineTraceStrategy::TS_MinInstrCount); + TraceEnsemble = Traces->getEnsemble(TII->getMachineCombinerTraceStrategy()); SparseSet RegUnits; RegUnits.setUniverse(TRI->getNumRegUnits()); diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/StackMaps.h" @@ -1049,6 +1050,10 @@ reassociateOps(Root, *Prev, Pattern, InsInstrs, DelInstrs, InstIdxForVirtReg); } +MachineTraceStrategy TargetInstrInfo::getMachineCombinerTraceStrategy() const { + return MachineTraceStrategy::TS_MinInstrCount; +} + bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric( const MachineInstr &MI) const { const MachineFunction &MF = *MI.getMF(); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -194,6 +194,8 @@ bool useMachineCombiner() const override { return true; } + MachineTraceStrategy getMachineCombinerTraceStrategy() const override; + void setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, MachineInstr &NewMI1, MachineInstr &NewMI2) const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/RegisterScavenging.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/MC/MCInstBuilder.h" @@ -44,6 +45,16 @@ "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden, cl::desc("Prefer whole register move for vector registers.")); +static cl::opt ForceMachineCombinerStrategy( + "riscv-force-machine-combiner-strategy", cl::Hidden, + cl::desc("Force machine combiner to use a specific strategy for machine " + "trace metrics evaluation."), + cl::init(MachineTraceStrategy::TS_NumStrategies), + cl::values(clEnumValN(MachineTraceStrategy::TS_Local, "local", + "Local strategy."), + clEnumValN(MachineTraceStrategy::TS_MinInstrCount, "min-instr", + "MinInstrCount strategy."))); + namespace llvm::RISCVVPseudosTable { using namespace RISCV; @@ -1257,6 +1268,20 @@ return std::nullopt; } +MachineTraceStrategy RISCVInstrInfo::getMachineCombinerTraceStrategy() const { + if (ForceMachineCombinerStrategy.getNumOccurrences() == 0) { + // The option is unused. Choose Local strategy only for in-order cores. When + // scheduling model is unspecified, use MinInstrCount strategy as more + // generic one. + const auto &SchedModel = STI.getSchedModel(); + return (!SchedModel.hasInstrSchedModel() || SchedModel.isOutOfOrder()) + ? MachineTraceStrategy::TS_MinInstrCount + : MachineTraceStrategy::TS_Local; + } + // The strategy was forced by the option. + return ForceMachineCombinerStrategy; +} + void RISCVInstrInfo::setSpecialOperandAttr(MachineInstr &OldMI1, MachineInstr &OldMI2, MachineInstr &NewMI1, diff --git a/llvm/test/CodeGen/RISCV/machine-combiner-strategies.ll b/llvm/test/CodeGen/RISCV/machine-combiner-strategies.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/machine-combiner-strategies.ll @@ -0,0 +1,86 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mcpu=syntacore-scr1-max \ +; RUN: -O1 -riscv-enable-machine-combiner=true -riscv-force-machine-combiner-strategy=local < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK_SCR1,CHECK_LOCAL_SCR1 + +; RUN: llc -mtriple=riscv32 -verify-machineinstrs -mcpu=syntacore-scr1-max \ +; RUN: -O1 -riscv-enable-machine-combiner=true -riscv-force-machine-combiner-strategy=min-instr < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK_SCR1,CHECK_GLOBAL_SCR1 + +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mcpu=sifive-u74 \ +; RUN: -O1 -riscv-enable-machine-combiner=true -riscv-force-machine-combiner-strategy=local < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK_SIFIVE_U74,CHECK_LOCAL_SIFIVE_U74 + +; RUN: llc -mtriple=riscv64 -verify-machineinstrs -mcpu=sifive-u74 \ +; RUN: -O1 -riscv-enable-machine-combiner=true -riscv-force-machine-combiner-strategy=min-instr < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK_SIFIVE_U74,CHECK_GLOBAL_SIFIVE_U74 + +define i32 @test_local_strategy(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5) { +; CHECK_LOCAL_SCR1-LABEL: test_local_strategy: +; CHECK_LOCAL_SCR1: # %bb.0: # %entry +; CHECK_LOCAL_SCR1-NEXT: div a0, a0, a1 +; CHECK_LOCAL_SCR1-NEXT: sub a0, a0, a2 +; CHECK_LOCAL_SCR1-NEXT: beqz a0, .LBB0_2 +; CHECK_LOCAL_SCR1-NEXT: # %bb.1: # %b2 +; CHECK_LOCAL_SCR1-NEXT: ret +; CHECK_LOCAL_SCR1-NEXT: .LBB0_2: # %b1 +; CHECK_LOCAL_SCR1-NEXT: add a3, a3, a4 +; CHECK_LOCAL_SCR1-NEXT: add a0, a0, a5 +; CHECK_LOCAL_SCR1-NEXT: add a0, a0, a3 +; CHECK_LOCAL_SCR1-NEXT: ret +; +; CHECK_GLOBAL_SCR1-LABEL: test_local_strategy: +; CHECK_GLOBAL_SCR1: # %bb.0: # %entry +; CHECK_GLOBAL_SCR1-NEXT: div a0, a0, a1 +; CHECK_GLOBAL_SCR1-NEXT: sub a0, a0, a2 +; CHECK_GLOBAL_SCR1-NEXT: beqz a0, .LBB0_2 +; CHECK_GLOBAL_SCR1-NEXT: # %bb.1: # %b2 +; CHECK_GLOBAL_SCR1-NEXT: ret +; CHECK_GLOBAL_SCR1-NEXT: .LBB0_2: # %b1 +; CHECK_GLOBAL_SCR1-NEXT: add a3, a3, a4 +; CHECK_GLOBAL_SCR1-NEXT: add a3, a3, a5 +; CHECK_GLOBAL_SCR1-NEXT: add a0, a0, a3 +; CHECK_GLOBAL_SCR1-NEXT: ret +; +; CHECK_LOCAL_SIFIVE_U74-LABEL: test_local_strategy: +; CHECK_LOCAL_SIFIVE_U74: # %bb.0: # %entry +; CHECK_LOCAL_SIFIVE_U74-NEXT: divw a0, a0, a1 +; CHECK_LOCAL_SIFIVE_U74-NEXT: subw a0, a0, a2 +; CHECK_LOCAL_SIFIVE_U74-NEXT: beqz a0, .LBB0_2 +; CHECK_LOCAL_SIFIVE_U74-NEXT: # %bb.1: # %b2 +; CHECK_LOCAL_SIFIVE_U74-NEXT: ret +; CHECK_LOCAL_SIFIVE_U74-NEXT: .LBB0_2: # %b1 +; CHECK_LOCAL_SIFIVE_U74-NEXT: add a3, a3, a4 +; CHECK_LOCAL_SIFIVE_U74-NEXT: add a0, a0, a5 +; CHECK_LOCAL_SIFIVE_U74-NEXT: addw a0, a0, a3 +; CHECK_LOCAL_SIFIVE_U74-NEXT: ret +; +; CHECK_GLOBAL_SIFIVE_U74-LABEL: test_local_strategy: +; CHECK_GLOBAL_SIFIVE_U74: # %bb.0: # %entry +; CHECK_GLOBAL_SIFIVE_U74-NEXT: divw a0, a0, a1 +; CHECK_GLOBAL_SIFIVE_U74-NEXT: subw a0, a0, a2 +; CHECK_GLOBAL_SIFIVE_U74-NEXT: beqz a0, .LBB0_2 +; CHECK_GLOBAL_SIFIVE_U74-NEXT: # %bb.1: # %b2 +; CHECK_GLOBAL_SIFIVE_U74-NEXT: ret +; CHECK_GLOBAL_SIFIVE_U74-NEXT: .LBB0_2: # %b1 +; CHECK_GLOBAL_SIFIVE_U74-NEXT: add a3, a3, a4 +; CHECK_GLOBAL_SIFIVE_U74-NEXT: add a3, a3, a5 +; CHECK_GLOBAL_SIFIVE_U74-NEXT: addw a0, a0, a3 +; CHECK_GLOBAL_SIFIVE_U74-NEXT: ret +entry: + %div = sdiv i32 %a0, %a1 + %sub0 = sub i32 %div, %a2 + %cmp = icmp eq i32 %sub0, 0 + br i1 %cmp, label %b1, label %b2 +b1: + %sub1 = add i32 %a3, %a4 + %sub2 = add i32 %a5, %sub1 + %sub3 = add i32 %sub2, %sub0 + ret i32 %sub3 +b2: + ret i32 %sub0 +} + +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK_SCR1: {{.*}} +; CHECK_SIFIVE_U74: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll --- a/llvm/test/CodeGen/RISCV/machine-combiner.ll +++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll @@ -1,7 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv64 -mattr=+d,+zbb,+zfh -verify-machineinstrs -mcpu=sifive-u74 \ -; RUN: -O1 -riscv-enable-machine-combiner=true < %s | \ -; RUN: FileCheck %s +; RUN: -O1 -riscv-enable-machine-combiner=true -riscv-force-machine-combiner-strategy=local < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK_LOCAL + +; RUN: llc -mtriple=riscv64 -mattr=+d,+zbb,+zfh -verify-machineinstrs -mcpu=sifive-u74 \ +; RUN: -O1 -riscv-enable-machine-combiner=true -riscv-force-machine-combiner-strategy=min-instr < %s | \ +; RUN: FileCheck %s --check-prefixes=CHECK,CHECK_GLOBAL define double @test_reassoc_fadd1(double %a0, double %a1, double %a2, double %a3) { ; CHECK-LABEL: test_reassoc_fadd1: @@ -1088,3 +1092,43 @@ declare half @llvm.maxnum.f16(half, half) declare float @llvm.maxnum.f32(float, float) declare double @llvm.maxnum.f64(double, double) + +define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a3, i64 %flag) { +; CHECK_LOCAL-LABEL: test_fmadd_strategy: +; CHECK_LOCAL: # %bb.0: # %entry +; CHECK_LOCAL-NEXT: fmv.d ft0, fa0 +; CHECK_LOCAL-NEXT: fsub.d ft1, fa0, fa1 +; CHECK_LOCAL-NEXT: fmul.d fa0, ft1, fa2 +; CHECK_LOCAL-NEXT: andi a0, a0, 1 +; CHECK_LOCAL-NEXT: beqz a0, .LBB76_2 +; CHECK_LOCAL-NEXT: # %bb.1: # %entry +; CHECK_LOCAL-NEXT: fmul.d ft1, ft0, fa1 +; CHECK_LOCAL-NEXT: fmadd.d ft0, ft0, fa1, fa0 +; CHECK_LOCAL-NEXT: fsub.d fa0, ft0, ft1 +; CHECK_LOCAL-NEXT: .LBB76_2: # %entry +; CHECK_LOCAL-NEXT: ret +; +; CHECK_GLOBAL-LABEL: test_fmadd_strategy: +; CHECK_GLOBAL: # %bb.0: # %entry +; CHECK_GLOBAL-NEXT: fmv.d ft0, fa0 +; CHECK_GLOBAL-NEXT: fsub.d ft1, fa0, fa1 +; CHECK_GLOBAL-NEXT: fmul.d fa0, ft1, fa2 +; CHECK_GLOBAL-NEXT: andi a0, a0, 1 +; CHECK_GLOBAL-NEXT: beqz a0, .LBB76_2 +; CHECK_GLOBAL-NEXT: # %bb.1: # %entry +; CHECK_GLOBAL-NEXT: fmul.d ft0, ft0, fa1 +; CHECK_GLOBAL-NEXT: fadd.d ft1, ft0, fa0 +; CHECK_GLOBAL-NEXT: fsub.d fa0, ft1, ft0 +; CHECK_GLOBAL-NEXT: .LBB76_2: # %entry +; CHECK_GLOBAL-NEXT: ret +entry: + %sub = fsub contract double %a0, %a1 + %mul = fmul contract double %sub, %a2 + %and = and i64 %flag, 1 + %tobool.not = icmp eq i64 %and, 0 + %mul2 = fmul contract double %a0, %a1 + %add = fadd contract double %mul2, %mul + %sub3 = fsub contract double %add, %mul2 + %retval.0 = select i1 %tobool.not, double %mul, double %sub3 + ret double %retval.0 +}