Index: llvm/lib/CodeGen/MachineCSE.cpp =================================================================== --- llvm/lib/CodeGen/MachineCSE.cpp +++ llvm/lib/CodeGen/MachineCSE.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CFG.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -66,6 +67,7 @@ AliasAnalysis *AA; MachineDominatorTree *DT; MachineRegisterInfo *MRI; + MachineBlockFrequencyInfo *MBFI; public: static char ID; // Pass identification @@ -83,6 +85,8 @@ AU.addPreservedID(MachineLoopInfoID); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); } void releaseMemory() override { @@ -133,6 +137,11 @@ bool isPRECandidate(MachineInstr *MI); bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB); bool PerformSimplePRE(MachineDominatorTree *DT); + /// Heuristics to see if it's beneficial to move common computations of MBB + /// and MBB1 to CandidateBB + bool isBeneficalToHoistInto(MachineBasicBlock *CandidateBB, + MachineBasicBlock *MBB, + MachineBasicBlock *MBB1); }; } // end anonymous namespace @@ -802,6 +811,9 @@ if (!CMBB->isLegalToHoistInto()) continue; + if (!isBeneficalToHoistInto(CMBB, MBB, MBB1)) + continue; + // Two instrs are partial redundant if their basic blocks are reachable // from one to another but one doesn't dominate another. if (CMBB != MBB1) { @@ -854,6 +866,17 @@ return Changed; } +bool MachineCSE::isBeneficalToHoistInto(MachineBasicBlock *CandidateBB, + MachineBasicBlock *MBB, + MachineBasicBlock *MBB1) { + assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB"); + assert(DT->dominates(CandidateBB, MBB1) && + "CandidateBB should dominate MBB1"); + // FIXME: Use '<' here might help to decrease register pressure? + return MBFI->getBlockFreq(CandidateBB) <= + MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1); +} + bool MachineCSE::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; @@ -863,6 +886,7 @@ MRI = &MF.getRegInfo(); AA = &getAnalysis().getAAResults(); DT = &getAnalysis(); + MBFI = &getAnalysis(); LookAheadLimit = TII->getMachineCSELookAheadLimit(); bool ChangedPRE, ChangedCSE; ChangedPRE = PerformSimplePRE(DT); Index: llvm/test/CodeGen/AArch64/O3-pipeline.ll =================================================================== --- llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -33,7 +33,7 @@ ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Merge contiguous icmps into a memcmp ; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions @@ -97,9 +97,9 @@ ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Early Machine Loop Invariant Code Motion +; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction -; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions Index: llvm/test/CodeGen/ARM/O3-pipeline.ll =================================================================== --- llvm/test/CodeGen/ARM/O3-pipeline.ll +++ llvm/test/CodeGen/ARM/O3-pipeline.ll @@ -72,9 +72,9 @@ ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Early Machine Loop Invariant Code Motion +; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction -; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions Index: llvm/test/CodeGen/PowerPC/machine-pre.ll =================================================================== --- llvm/test/CodeGen/PowerPC/machine-pre.ll +++ llvm/test/CodeGen/PowerPC/machine-pre.ll @@ -8,25 +8,25 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lis r7, 0 ; CHECK-P9-NEXT: li r6, 0 +; CHECK-P9-NEXT: li r8, 0 ; CHECK-P9-NEXT: li r9, 0 -; CHECK-P9-NEXT: li r10, 0 ; CHECK-P9-NEXT: ori r7, r7, 65535 ; CHECK-P9-NEXT: .p2align 5 ; CHECK-P9-NEXT: .LBB0_1: # %header ; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: addi r10, r10, 1 -; CHECK-P9-NEXT: cmpw r10, r3 -; CHECK-P9-NEXT: addi r8, r5, 1024 +; CHECK-P9-NEXT: addi r9, r9, 1 +; CHECK-P9-NEXT: cmpw r9, r3 ; CHECK-P9-NEXT: blt cr0, .LBB0_4 ; CHECK-P9-NEXT: # %bb.2: # %cont ; CHECK-P9-NEXT: # -; CHECK-P9-NEXT: add r9, r9, r4 -; CHECK-P9-NEXT: cmpw r9, r7 +; CHECK-P9-NEXT: add r8, r8, r4 +; CHECK-P9-NEXT: cmpw r8, r7 ; CHECK-P9-NEXT: bgt cr0, .LBB0_1 ; CHECK-P9-NEXT: # %bb.3: # %cont.1 -; CHECK-P9-NEXT: mr r6, r8 +; CHECK-P9-NEXT: addi r6, r5, 1024 ; CHECK-P9-NEXT: .LBB0_4: # %return -; CHECK-P9-NEXT: mullw r3, r6, r8 +; CHECK-P9-NEXT: addi r3, r5, 1024 +; CHECK-P9-NEXT: mullw r3, r6, r3 ; CHECK-P9-NEXT: blr entry: br label %header Index: llvm/test/CodeGen/X86/O3-pipeline.ll =================================================================== --- llvm/test/CodeGen/X86/O3-pipeline.ll +++ llvm/test/CodeGen/X86/O3-pipeline.ll @@ -30,7 +30,7 @@ ; CHECK-NEXT: Induction Variable Users ; CHECK-NEXT: Loop Strength Reduction ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) -; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Function Alias Analysis Results ; CHECK-NEXT: Merge contiguous icmps into a memcmp ; CHECK-NEXT: Expand memcmp() to load/stores ; CHECK-NEXT: Lower Garbage Collection Instructions @@ -67,7 +67,7 @@ ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Local Dynamic TLS Access Clean-up ; CHECK-NEXT: X86 PIC Global Base Reg Initialization -; CHECK-NEXT: Finalize ISel and expand pseudo-instructions +; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: X86 Domain Reassignment Pass ; CHECK-NEXT: Early Tail Duplication ; CHECK-NEXT: Optimize machine instruction PHIs @@ -84,9 +84,9 @@ ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: Early Machine Loop Invariant Code Motion +; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine Common Subexpression Elimination ; CHECK-NEXT: MachinePostDominator Tree Construction -; CHECK-NEXT: Machine Block Frequency Analysis ; CHECK-NEXT: Machine code sinking ; CHECK-NEXT: Peephole Optimizations ; CHECK-NEXT: Remove dead machine instructions @@ -94,7 +94,7 @@ ; CHECK-NEXT: X86 Fixup SetCC ; CHECK-NEXT: X86 LEA Optimize ; CHECK-NEXT: X86 Optimize Call Frame -; CHECK-NEXT: X86 Avoid Store Forwarding Block +; CHECK-NEXT: X86 Avoid Store Forwarding Blocks ; CHECK-NEXT: X86 speculative load hardening ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: X86 EFLAGS copy lowering @@ -176,3 +176,4 @@ define void @f() { ret void } +