diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -14527,113 +14527,118 @@ InstSetVector PostProcessInstructions; SmallDenseSet KeyNodes; - for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { - // Skip instructions with scalable type. The num of elements is unknown at - // compile-time for scalable type. - if (isa(it->getType())) - continue; - - // Skip instructions marked for the deletion. - if (R.isDeleted(&*it)) - continue; - // We may go through BB multiple times so skip the one we have checked. - if (!VisitedInstrs.insert(&*it).second) { - if (it->use_empty() && KeyNodes.contains(&*it) && - vectorizeSimpleInstructions(PostProcessInstructions, BB, R, - it->isTerminator())) { - // We would like to start over since some instructions are deleted - // and the iterator may become invalid value. - Changed = true; - it = BB->begin(); - e = BB->end(); - } - continue; - } - - if (isa(it)) - continue; + // This loop helps restart the iteration over the instructions of BB. + bool RestartIterationOverBB = true; + while (RestartIterationOverBB) { + RestartIterationOverBB = false; + + for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) { + // Skip instructions with scalable type. The num of elements is unknown at + // compile-time for scalable type. + if (isa(it->getType())) + continue; - // Try to vectorize reductions that use PHINodes. - if (PHINode *P = dyn_cast(it)) { - // Check that the PHI is a reduction PHI. - if (P->getNumIncomingValues() == 2) { - // Try to match and vectorize a horizontal reduction. - Instruction *Root = getReductionInstr(DT, P, BB, LI); - if (Root && vectorizeRootInstruction(P, Root, BB, R, TTI)) { + // Skip instructions marked for the deletion. + if (R.isDeleted(&*it)) + continue; + // We may go through BB multiple times so skip the one we have checked. + if (!VisitedInstrs.insert(&*it).second) { + if (it->use_empty() && KeyNodes.contains(&*it) && + vectorizeSimpleInstructions(PostProcessInstructions, BB, R, + it->isTerminator())) { + // We would like to start over since some instructions are deleted + // and the iterator may become invalid value. Changed = true; - it = BB->begin(); - e = BB->end(); - continue; + RestartIterationOverBB = true; + break; } + continue; } - // Try to vectorize the incoming values of the PHI, to catch reductions - // that feed into PHIs. - for (unsigned I = 0, E = P->getNumIncomingValues(); I != E; I++) { - // Skip if the incoming block is the current BB for now. Also, bypass - // unreachable IR for efficiency and to avoid crashing. - // TODO: Collect the skipped incoming values and try to vectorize them - // after processing BB. - if (BB == P->getIncomingBlock(I) || - !DT->isReachableFromEntry(P->getIncomingBlock(I))) - continue; - // Postponed instructions should not be vectorized here, delay their - // vectorization. - if (auto *PI = dyn_cast(P->getIncomingValue(I)); - PI && !PostProcessInstructions.contains(PI)) - Changed |= vectorizeRootInstruction(nullptr, PI, - P->getIncomingBlock(I), R, TTI); - } - continue; - } + if (isa(it)) + continue; + + // Try to vectorize reductions that use PHINodes. + if (PHINode *P = dyn_cast(it)) { + // Check that the PHI is a reduction PHI. + if (P->getNumIncomingValues() == 2) { + // Try to match and vectorize a horizontal reduction. + Instruction *Root = getReductionInstr(DT, P, BB, LI); + if (Root && vectorizeRootInstruction(P, Root, BB, R, TTI)) { + Changed = true; + RestartIterationOverBB = true; + break; + } + } + // Try to vectorize the incoming values of the PHI, to catch reductions + // that feed into PHIs. + for (unsigned I = 0, E = P->getNumIncomingValues(); I != E; I++) { + // Skip if the incoming block is the current BB for now. Also, bypass + // unreachable IR for efficiency and to avoid crashing. + // TODO: Collect the skipped incoming values and try to vectorize them + // after processing BB. + if (BB == P->getIncomingBlock(I) || + !DT->isReachableFromEntry(P->getIncomingBlock(I))) + continue; - // Ran into an instruction without users, like terminator, or function call - // with ignored return value, store. Ignore unused instructions (basing on - // instruction type, except for CallInst and InvokeInst). - if (it->use_empty() && - (it->getType()->isVoidTy() || isa(it))) { - KeyNodes.insert(&*it); - bool OpsChanged = false; - auto *SI = dyn_cast(it); - bool TryToVectorizeRoot = ShouldStartVectorizeHorAtStore || !SI; - if (SI) { - auto I = Stores.find(getUnderlyingObject(SI->getPointerOperand())); - // Try to vectorize chain in store, if this is the only store to the - // address in the block. - // TODO: This is just a temporarily solution to save compile time. Need - // to investigate if we can safely turn on slp-vectorize-hor-store - // instead to allow lookup for reduction chains in all non-vectorized - // stores (need to check side effects and compile time). - TryToVectorizeRoot = (I == Stores.end() || I->second.size() == 1) && - SI->getValueOperand()->hasOneUse(); - } - if (TryToVectorizeRoot) { - for (auto *V : it->operand_values()) { // Postponed instructions should not be vectorized here, delay their // vectorization. - if (auto *VI = dyn_cast(V); - VI && !PostProcessInstructions.contains(VI)) - // Try to match and vectorize a horizontal reduction. - OpsChanged |= vectorizeRootInstruction(nullptr, VI, BB, R, TTI); - } - } - // Start vectorization of post-process list of instructions from the - // top-tree instructions to try to vectorize as many instructions as - // possible. - OpsChanged |= vectorizeSimpleInstructions(PostProcessInstructions, BB, R, - it->isTerminator()); - if (OpsChanged) { - // We would like to start over since some instructions are deleted - // and the iterator may become invalid value. - Changed = true; - it = BB->begin(); - e = BB->end(); + if (auto *PI = dyn_cast(P->getIncomingValue(I)); + PI && !PostProcessInstructions.contains(PI)) + Changed |= vectorizeRootInstruction(nullptr, PI, + P->getIncomingBlock(I), R, TTI); + } continue; } - } - if (isa(it)) - PostProcessInstructions.insert(&*it); + // Ran into an instruction without users, like terminator, or function + // call with ignored return value, store. Ignore unused instructions + // (basing on instruction type, except for CallInst and InvokeInst). + if (it->use_empty() && + (it->getType()->isVoidTy() || isa(it))) { + KeyNodes.insert(&*it); + bool OpsChanged = false; + auto *SI = dyn_cast(it); + bool TryToVectorizeRoot = ShouldStartVectorizeHorAtStore || !SI; + if (SI) { + auto I = Stores.find(getUnderlyingObject(SI->getPointerOperand())); + // Try to vectorize chain in store, if this is the only store to the + // address in the block. + // TODO: This is just a temporarily solution to save compile time. + // Need to investigate if we can safely turn on + // slp-vectorize-hor-store instead to allow lookup for reduction + // chains in all non-vectorized stores (need to check side effects and + // compile time). + TryToVectorizeRoot = (I == Stores.end() || I->second.size() == 1) && + SI->getValueOperand()->hasOneUse(); + } + if (TryToVectorizeRoot) { + for (auto *V : it->operand_values()) { + // Postponed instructions should not be vectorized here, delay their + // vectorization. + if (auto *VI = dyn_cast(V); + VI && !PostProcessInstructions.contains(VI)) + // Try to match and vectorize a horizontal reduction. + OpsChanged |= vectorizeRootInstruction(nullptr, VI, BB, R, TTI); + } + } + // Start vectorization of post-process list of instructions from the + // top-tree instructions to try to vectorize as many instructions as + // possible. + OpsChanged |= vectorizeSimpleInstructions(PostProcessInstructions, BB, + R, it->isTerminator()); + if (OpsChanged) { + // We would like to start over since some instructions are deleted + // and the iterator may become invalid value. + Changed = true; + RestartIterationOverBB = true; + break; + } + } + + if (isa(it)) + PostProcessInstructions.insert(&*it); + } } return Changed;