Index: llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp =================================================================== --- llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp +++ llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2946,42 +2946,8 @@ BasicBlock *QTB, BasicBlock *QFB, BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond, - const DataLayout &DL) { - auto IsaBitcastOfPointerType = [](const Instruction &I) { - return Operator::getOpcode(&I) == Instruction::BitCast && - I.getType()->isPointerTy(); - }; - - // If we're not in aggressive mode, we only optimize if we have some - // confidence that by optimizing we'll allow P and/or Q to be if-converted. - auto IsWorthwhile = [&](BasicBlock *BB) { - if (!BB) - return true; - // Heuristic: if the block can be if-converted/phi-folded and the - // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to - // thread this store. - unsigned N = 0; - for (auto &I : BB->instructionsWithoutDebug()) { - // Cheap instructions viable for folding. - if (isa(I) || isa(I) || - isa(I)) - ++N; - // Free instructions. - else if (I.isTerminator() || IsaBitcastOfPointerType(I)) - continue; - else - return false; - } - // The store we want to merge is counted in N, so add 1 to make sure - // we're counting the instructions that would be left. - return N <= (PHINodeFoldingThreshold + 1); - }; - - if (!MergeCondStoresAggressively && - (!IsWorthwhile(PTB) || !IsWorthwhile(PFB) || !IsWorthwhile(QTB) || - !IsWorthwhile(QFB))) - return false; - + const DataLayout &DL, + const TargetTransformInfo &TTI) { // For every pointer, there must be exactly two stores, one coming from // PTB or PFB, and the other from QTB or QFB. We don't support more than one // store (to any address) in PTB,PFB or QTB,QFB. @@ -3022,6 +2988,46 @@ if (&*I != PStore && I->mayReadOrWriteMemory()) return false; + // If we're not in aggressive mode, we only optimize if we have some + // confidence that by optimizing we'll allow P and/or Q to be if-converted. + auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef FreeStores) { + if (!BB) + return true; + // Heuristic: if the block can be if-converted/phi-folded and the + // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to + // thread this store. + int BudgetRemaining = + PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic; + for (auto &I : BB->instructionsWithoutDebug()) { + // Consider terminator instruction to be free. + if (I.isTerminator()) + continue; + // If this is one the stores that we want to speculate out of this BB, + // then don't count it's cost, consider it to be free. + if (auto *S = dyn_cast(&I)) + if (llvm::find(FreeStores, S)) + continue; + // Else, we have a white-list of instructions that we are ak speculating. + if (!isa(I) && !isa(I)) + return false; // Not in white-list - not worthwhile folding. + // And finally, if this is a non-free instruction that we are okay + // speculating, ensure that we consider the speculation budget. + BudgetRemaining -= TTI.getUserCost(&I); + if (BudgetRemaining < 0) + return false; // Eagerly refuse to fold as soon as we're out of budget. + } + assert(BudgetRemaining >= 0 && + "When we run out of budget we will eagerly return from within the " + "per-instruction loop."); + return true; + }; + + ArrayRef FreeStores = {PStore, QStore}; + if (!MergeCondStoresAggressively && + (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) || + !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores))) + return false; + // If PostBB has more than two predecessors, we need to split it so we can // sink the store. if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) { @@ -3099,7 +3105,8 @@ } static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI, - const DataLayout &DL) { + const DataLayout &DL, + const TargetTransformInfo &TTI) { // The intention here is to find diamonds or triangles (see below) where each // conditional block contains a store to the same address. Both of these // stores are conditional, so they can't be unconditionally sunk. But it may @@ -3201,7 +3208,7 @@ bool Changed = false; for (auto *Address : CommonAddresses) Changed |= mergeConditionalStoreToAddress( - PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL); + PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL, TTI); return Changed; } @@ -3210,7 +3217,8 @@ /// that PBI and BI are both conditional branches, and BI is in one of the /// successor blocks of PBI - PBI branches to BI. static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI, - const DataLayout &DL) { + const DataLayout &DL, + const TargetTransformInfo &TTI) { assert(PBI->isConditional() && BI->isConditional()); BasicBlock *BB = BI->getParent(); @@ -3266,7 +3274,7 @@ // If both branches are conditional and both contain stores to the same // address, remove the stores from the conditionals and create a conditional // merged store at the end. - if (MergeCondStores && mergeConditionalStores(PBI, BI, DL)) + if (MergeCondStores && mergeConditionalStores(PBI, BI, DL, TTI)) return true; // If this is a conditional branch in an empty block, and if any @@ -5938,7 +5946,7 @@ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) if (BranchInst *PBI = dyn_cast((*PI)->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (SimplifyCondBranchToCondBranch(PBI, BI, DL)) + if (SimplifyCondBranchToCondBranch(PBI, BI, DL, TTI)) return requestResimplify(); // Look for diamond patterns. @@ -5946,7 +5954,7 @@ if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB)) if (BranchInst *PBI = dyn_cast(PrevBB->getTerminator())) if (PBI != BI && PBI->isConditional()) - if (mergeConditionalStores(PBI, BI, DL)) + if (mergeConditionalStores(PBI, BI, DL, TTI)) return requestResimplify(); return false; Index: llvm/trunk/test/Transforms/SimplifyCFG/X86/merge-cond-stores-cost.ll =================================================================== --- llvm/trunk/test/Transforms/SimplifyCFG/X86/merge-cond-stores-cost.ll +++ llvm/trunk/test/Transforms/SimplifyCFG/X86/merge-cond-stores-cost.ll @@ -8,24 +8,16 @@ ; CHECK-NEXT: br i1 [[X1]], label [[FALLTHROUGH:%.*]], label [[YES1:%.*]] ; CHECK: yes1: ; CHECK-NEXT: [[VAL0:%.*]] = sdiv i32 [[D:%.*]], [[C:%.*]] +; CHECK-NEXT: store i32 [[VAL0]], i32* [[P:%.*]] ; CHECK-NEXT: br label [[FALLTHROUGH]] ; CHECK: fallthrough: -; CHECK-NEXT: [[SIMPLIFYCFG_MERGE:%.*]] = phi i32 [ [[VAL0]], [[YES1]] ], [ undef, [[ENTRY:%.*]] ] ; CHECK-NEXT: [[X2:%.*]] = icmp eq i32 [[B:%.*]], 0 ; CHECK-NEXT: br i1 [[X2]], label [[END:%.*]], label [[YES2:%.*]] ; CHECK: yes2: ; CHECK-NEXT: [[VAL1:%.*]] = sdiv i32 [[C]], [[D]] +; CHECK-NEXT: store i32 [[VAL1]], i32* [[P]] ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[SIMPLIFYCFG_MERGE1:%.*]] = phi i32 [ [[VAL1]], [[YES2]] ], [ [[SIMPLIFYCFG_MERGE]], [[FALLTHROUGH]] ] -; CHECK-NEXT: [[TMP0:%.*]] = xor i1 [[X1]], true -; CHECK-NEXT: [[TMP1:%.*]] = xor i1 [[X2]], true -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]] -; CHECK-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; CHECK: 3: -; CHECK-NEXT: store i32 [[SIMPLIFYCFG_MERGE1]], i32* [[P:%.*]], align 4 -; CHECK-NEXT: br label [[TMP4]] -; CHECK: 4: ; CHECK-NEXT: ret void ; entry: