Index: lib/Transforms/Scalar/LoopUnrollPass.cpp =================================================================== --- lib/Transforms/Scalar/LoopUnrollPass.cpp +++ lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -357,6 +357,11 @@ // post-unrolling. DenseMap SimplifiedValues; + // Similarly, we keep track of all instructions that become dead. + // We don't need to map them to a value, that's why we use Set instead of Map + // here. + SmallPtrSet DeadInstructions; + // To avoid requesting SCEV info on every iteration, request it once, and // for each value that would become ConstAddress+Constant after loop // unrolling, save the corresponding data. @@ -525,6 +530,7 @@ // we literally have to go through all loop's iterations. for (Iteration = 0; Iteration < TripCount; ++Iteration) { SimplifiedValues.clear(); + DeadInstructions.clear(); BBWorklist.clear(); BBWorklist.insert(L->getHeader()); // Note that we *must not* cache the size, this loop grows the worklist. @@ -557,6 +563,25 @@ UnrolledLoopSize = UINT_MAX; return; } + + for (unsigned Idx = BBWorklist.size() - 1; Idx != 0; --Idx) { + BasicBlock *BB = BBWorklist[Idx]; + if (BB->empty()) + continue; + for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E; ++I) { + if (SimplifiedValues.count(&*I)) + continue; + if (DeadInstructions.count(&*I)) + continue; + if (std::all_of(I->user_begin(), I->user_end(), [&](User *U) { + return SimplifiedValues.count(cast(U)) + + DeadInstructions.count(cast(U)); + })) { + NumberOfOptimizedInstructions += TTI.getUserCost(&*I); + DeadInstructions.insert(&*I); + } + } + } } // If we can overflow computing percentage of optimized instructions, just