Index: include/llvm/CodeGen/MachineInstr.h =================================================================== --- include/llvm/CodeGen/MachineInstr.h +++ include/llvm/CodeGen/MachineInstr.h @@ -356,6 +356,7 @@ /// memory access done by this instruction. If this is true, calling code /// must be conservative. bool memoperands_empty() const { return NumMemRefs == 0; } + unsigned getNumMemOperands() const { return NumMemRefs; } iterator_range memoperands() { return make_range(memoperands_begin(), memoperands_end()); Index: lib/CodeGen/BranchFolding.cpp =================================================================== --- lib/CodeGen/BranchFolding.cpp +++ lib/CodeGen/BranchFolding.cpp @@ -65,6 +65,13 @@ cl::desc("Min number of instructions to consider tail merging"), cl::init(3), cl::Hidden); +// merge-mmos-threshold's maximum value should be <= 255. +// Because NumMemRefs's type is uint8_t now. +static cl::opt + MergeMMOsThreshold("merge-mmos-threshold", + cl::desc("Threshold for mergeMMOs function"), + cl::init(16), cl::Hidden); + namespace { /// BranchFolderPass - Wrap branch folder in a machine function pass. class BranchFolderPass : public MachineFunctionPass { @@ -744,24 +751,39 @@ return true; } -static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) { +// Add MI1's MMOs to MI2's MMOs while excluding any duplicates. The MI scheduler +// currently doesn't handle multiple MMOs, so duplicates would likely pessimize +// the scheduler. +static void mergeMMOs(MachineInstr *MI1, MachineInstr *MI2) { auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end(); auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end(); - if ((E1 - I1) != (E2 - I2)) - return false; - for (; I1 != E1; ++I1, ++I2) { - if (**I1 != **I2) - return false; + MachineFunction *MF = MI1->getParent()->getParent(); + + // Mostly, MI1's MMO count is 1 or zero. So we don't have to use + // SmallSet. The threshold check is done in the caller. + for (; I1 != E1; ++I1) { + bool IsDupMMO = false; + for (I2 = MI2->memoperands_begin(); I2 != E2; ++I2) { + if (**I1 == **I2) { + IsDupMMO = true; + break; + } + } + if (IsDupMMO == false) { + MI2->addMemOperand(*MF, *I1); + E2 = MI2->memoperands_end(); + } } - return true; } static void -removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, - MachineBasicBlock &MBBCommon) { - // Remove MMOs from memory operations in the common block - // when they do not match the ones from the block being tail-merged. - // This ensures later passes conservatively compute dependencies. +removeOrMergeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos, + MachineBasicBlock &MBBCommon) { + + unsigned MergeThreshold = MergeMMOsThreshold; + // If MBB's MMOs count * MBBCommon's MMOs count is smaller than + // MergeThreshold, merge MMOs from memory operations in the + // common block. Otherwise, we remove them. MachineBasicBlock *MBB = MBBIStartPos->getParent(); // Note CommonTailLen does not necessarily matches the size of // the common BB nor all its instructions because of debug @@ -791,9 +813,13 @@ "Reached BB end within common tail length!"); assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!"); - if (MBBICommon->mayLoad() || MBBICommon->mayStore()) - if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon)) + if (MBBICommon->mayLoad() || MBBICommon->mayStore()) { + if (MBBICommon->getNumMemOperands() * MBBI->getNumMemOperands() < + MergeThreshold) + mergeMMOs(&*MBBI, &*MBBICommon); + else MBBICommon->dropMemRefs(); + } ++MBBI; ++MBBICommon; @@ -913,8 +939,9 @@ continue; DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber() << (i == e-1 ? "" : ", ")); - // Remove MMOs from memory operations as needed. - removeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), *MBB); + // Remove or merge MMOs from memory operations as needed. + removeOrMergeMMOsFromMemoryOperations(SameTails[i].getTailStartPos(), + *MBB); // Hack the end off BB i, making it jump to BB commonTailIndex instead. ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB); // BB i is no longer a predecessor of SuccBB; remove it from the worklist.