Index: llvm/lib/CodeGen/MachineBlockPlacement.cpp =================================================================== --- llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -513,6 +513,11 @@ bool runOnMachineFunction(MachineFunction &F) override; + bool allowTailDupPlacement() const { + assert(F); + return TailDupPlacement && !F->getTarget().requiresStructuredCFG(); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.addRequired(); @@ -1018,7 +1023,7 @@ MachineBasicBlock *Succ1 = BestA.Dest; MachineBasicBlock *Succ2 = BestB.Dest; // Check to see if tail-duplication would be profitable. - if (TailDupPlacement && shouldTailDuplicate(Succ2) && + if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) && canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) && isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1), Chain, BlockFilter)) { @@ -1044,7 +1049,7 @@ return Result; } -/// When the option TailDupPlacement is on, this method checks if the +/// When the option allowTailDupPlacement() is on, this method checks if the /// fallthrough candidate block \p Succ (of block \p BB) can be tail-duplicated /// into all of its unplaced, unfiltered predecessors, that are not BB. bool MachineBlockPlacement::canTailDuplicateUnplacedPreds( @@ -1493,7 +1498,7 @@ if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb, Chain, BlockFilter)) { // If tail duplication would make Succ profitable, place it. - if (TailDupPlacement && shouldTailDuplicate(Succ)) + if (allowTailDupPlacement() && shouldTailDuplicate(Succ)) DupCandidates.push_back(std::make_tuple(SuccProb, Succ)); continue; } @@ -1702,7 +1707,7 @@ auto Result = selectBestSuccessor(BB, Chain, BlockFilter); MachineBasicBlock* BestSucc = Result.BB; bool ShouldTailDup = Result.ShouldTailDup; - if (TailDupPlacement) + if (allowTailDupPlacement()) ShouldTailDup |= (BestSucc && shouldTailDuplicate(BestSucc)); // If an immediate successor isn't available, look for the best viable @@ -1724,7 +1729,7 @@ // Placement may have changed tail duplication opportunities. // Check for that now. - if (TailDupPlacement && BestSucc && ShouldTailDup) { + if (allowTailDupPlacement() && BestSucc && ShouldTailDup) { // If the chosen successor was duplicated into all its predecessors, // don't bother laying it out, just go round the loop again with BB as // the chain end. @@ -2758,7 +2763,7 @@ TailDupSize = TailDupPlacementAggressiveThreshold; } - if (TailDupPlacement) { + if (allowTailDupPlacement()) { MPDT = &getAnalysis(); if (MF.getFunction().optForSize()) TailDupSize = 1; Index: llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp =================================================================== --- llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -44,6 +44,14 @@ cl::desc("Disable load/store vectorizer"), cl::init(false), cl::Hidden); +// TODO: Remove this flag when we are confident with no regressions. +static cl::opt DisableRequireStructuredCFG( + "disable-nvptx-require-structured-cfg", + cl::desc("Transitional flag to turn ofo NVPTX's requirement on preserving " + "structured CFG. The requirement should be disabled only when " + "unexpected regressions happen."), + cl::init(false), cl::Hidden); + namespace llvm { void initializeNVVMIntrRangePass(PassRegistry&); @@ -108,6 +116,8 @@ drvInterface = NVPTX::NVCL; else drvInterface = NVPTX::CUDA; + if (!DisableRequireStructuredCFG) + setRequiresStructuredCFG(true); initAsmInfo(); }