diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -52,6 +52,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; @@ -69,6 +70,7 @@ const SIRegisterInfo *TRI = nullptr; const SIInstrInfo *TII = nullptr; LiveIntervals *LIS = nullptr; + MachineDominatorTree *MDT = nullptr; MachineRegisterInfo *MRI = nullptr; SetVector LoweredEndCf; DenseSet LoweredIf; @@ -141,6 +143,7 @@ void getAnalysisUsage(AnalysisUsage &AU) const override { // Should preserve the same set that TwoAddressInstructions does. + AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addPreservedID(LiveVariablesID); @@ -471,6 +474,14 @@ MachineBasicBlock *SplitBB = &MBB; if (NeedBlockSplit) { SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/true, LIS); + if (MDT && SplitBB != &MBB) { + MachineDomTreeNode *MBBNode = (*MDT)[&MBB]; + SmallVector Children(MBBNode->begin(), + MBBNode->end()); + MachineDomTreeNode *SplitBBNode = MDT->addNewBlock(SplitBB, &MBB); + for (MachineDomTreeNode *Child : Children) + MDT->changeImmediateDominator(Child, SplitBBNode); + } Opcode = OrTermrOpc; InsPt = MI; } @@ -757,6 +768,8 @@ for (auto &I : MBB.instrs()) LIS->RemoveMachineInstrFromMaps(I); } + if (MDT) + MDT->eraseNode(&MBB); MBB.clear(); MBB.eraseFromParent(); if (FallThrough && !FallThrough->isLayoutSuccessor(Succ)) { @@ -780,6 +793,7 @@ // This doesn't actually need LiveIntervals, but we can preserve them. LIS = getAnalysisIfAvailable(); + MDT = getAnalysisIfAvailable(); MRI = &MF.getRegInfo(); BoolRC = TRI->getBoolRC(); diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -316,7 +316,6 @@ ; GCN-O1-NEXT: Eliminate PHI nodes for register allocation ; GCN-O1-NEXT: SI Lower control flow pseudo instructions ; GCN-O1-NEXT: Two-Address instruction pass -; GCN-O1-NEXT: MachineDominator Tree Construction ; GCN-O1-NEXT: Slot index numbering ; GCN-O1-NEXT: Live Interval Analysis ; GCN-O1-NEXT: Machine Natural Loop Construction @@ -600,7 +599,6 @@ ; GCN-O1-OPTS-NEXT: Eliminate PHI nodes for register allocation ; GCN-O1-OPTS-NEXT: SI Lower control flow pseudo instructions ; GCN-O1-OPTS-NEXT: Two-Address instruction pass -; GCN-O1-OPTS-NEXT: MachineDominator Tree Construction ; GCN-O1-OPTS-NEXT: Slot index numbering ; GCN-O1-OPTS-NEXT: Live Interval Analysis ; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction @@ -886,7 +884,6 @@ ; GCN-O2-NEXT: Eliminate PHI nodes for register allocation ; GCN-O2-NEXT: SI Lower control flow pseudo instructions ; GCN-O2-NEXT: Two-Address instruction pass -; GCN-O2-NEXT: MachineDominator Tree Construction ; GCN-O2-NEXT: Slot index numbering ; GCN-O2-NEXT: Live Interval Analysis ; GCN-O2-NEXT: Machine Natural Loop Construction @@ -1186,7 +1183,6 @@ ; GCN-O3-NEXT: Eliminate PHI nodes for register allocation ; GCN-O3-NEXT: SI Lower control flow pseudo instructions ; GCN-O3-NEXT: Two-Address instruction pass -; GCN-O3-NEXT: MachineDominator Tree Construction ; GCN-O3-NEXT: Slot index numbering ; GCN-O3-NEXT: Live Interval Analysis ; GCN-O3-NEXT: Machine Natural Loop Construction