Index: lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -107,10 +107,11 @@ splitOddVectorElts(ArrayRef Chain, unsigned ElementSizeBits); /// Checks if there are any instructions which may affect the memory accessed - /// in the chain between \p From and \p To. The elements of \p Chain should be - /// all loads or all stores. - bool isVectorizable(ArrayRef Chain, BasicBlock::iterator From, - BasicBlock::iterator To); + /// in the chain between \p From and \p To. Return Index, where + /// \p Chain[0, Index) is the largest vectorizable chain prefix. The elements + /// of \p Chain should be all loads or all stores. + unsigned getVectorizableEnd(ArrayRef Chain, BasicBlock::iterator From, + BasicBlock::iterator To); /// Collects load and store instructions to vectorize. void collectInstructions(BasicBlock *BB); @@ -123,10 +124,12 @@ bool vectorizeInstructions(ArrayRef Instrs); /// Vectorizes the load instructions in Chain. - bool vectorizeLoadChain(ArrayRef Chain); + bool vectorizeLoadChain(ArrayRef Chain, + SmallVector *InstructionsProcessed); /// Vectorizes the store instructions in Chain. - bool vectorizeStoreChain(ArrayRef Chain); + bool vectorizeStoreChain(ArrayRef Chain, + SmallVector *InstructionsProcessed); /// Check if this load/store access is misaligned accesses bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace, @@ -347,11 +350,11 @@ if (!IM || IM->getOpcode() == Instruction::PHI) continue; - if (!DT.dominates(IM, IW)) { + if (!DT.dominates(IM, I)) { InstructionsToMove.insert(IM); Worklist.push_back(IM); - DEBUG(assert(IM->getParent() == IW->getParent() && - "Instructions to move should be in the same basic block")); + assert(IM->getParent() == IW->getParent() && + "Instructions to move should be in the same basic block"); } } } @@ -422,9 +425,9 @@ return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft)); } -bool Vectorizer::isVectorizable(ArrayRef Chain, - BasicBlock::iterator From, - BasicBlock::iterator To) { +unsigned Vectorizer::getVectorizableEnd(ArrayRef Chain, + BasicBlock::iterator From, + BasicBlock::iterator To) { SmallVector, 16> MemoryInstrs; SmallVector, 16> ChainInstrs; @@ -437,19 +440,20 @@ ChainInstrs.push_back({&*I, Idx}); } else if (I->mayHaveSideEffects()) { DEBUG(dbgs() << "LSV: Found side-effecting operation: " << *I << '\n'); - return false; + return 0; } } assert(Chain.size() == ChainInstrs.size() && "All instructions in the Chain must exist in [From, To)."); - for (auto EntryMem : MemoryInstrs) { - Value *V = EntryMem.first; - unsigned VIdx = EntryMem.second; - for (auto EntryChain : ChainInstrs) { - Value *VV = EntryChain.first; - unsigned VVIdx = EntryChain.second; + unsigned Index = 0; + for (auto EntryChain : ChainInstrs) { + Value *VV = EntryChain.first; + unsigned VVIdx = EntryChain.second; + for (auto EntryMem : MemoryInstrs) { + Value *V = EntryMem.first; + unsigned VIdx = EntryMem.second; if (isa(V) && isa(VV)) continue; @@ -479,12 +483,12 @@ << *VV << " aliases " << *Ptr1 << '\n'; }); - return false; + return Index; } } + Index++; } - - return true; + return Chain.size(); } void Vectorizer::collectInstructions(BasicBlock *BB) { @@ -617,8 +621,16 @@ bool Changed = false; SmallPtrSet VectorizedValues; - for (int Head : Heads) { - if (Tails.count(Head)) + for (unsigned i = 0; i < Heads.size(); i++) { + int Head = Heads[i]; + if (VectorizedValues.count(Instrs[Head])) + continue; + for (unsigned j = 0; j < Tails.size(); j++) + if (Head == Tails[j] && !VectorizedValues.count(Instrs[Heads[j]])) { + Head = -1; + break; + } + if (Head < 0) continue; // We found an instr that starts a chain. Now follow the chain and try to @@ -634,21 +646,24 @@ } bool Vectorized = false; + SmallVector InstructionsProcessed; if (isa(*Operands.begin())) - Vectorized = vectorizeLoadChain(Operands); + Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed); else - Vectorized = vectorizeStoreChain(Operands); + Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed); - // Mark the vectorized instructions so that we don't vectorize them again. - if (Vectorized) - VectorizedValues.insert(Operands.begin(), Operands.end()); + // Mark the processed instructions so that we don't try to + // vectorize them again. + if (InstructionsProcessed.size()) + VectorizedValues.insert(InstructionsProcessed.begin(), InstructionsProcessed.end()); Changed |= Vectorized; } return Changed; } -bool Vectorizer::vectorizeStoreChain(ArrayRef Chain) { +bool Vectorizer::vectorizeStoreChain(ArrayRef Chain, + SmallVector *InstructionsProcessed) { StoreInst *S0 = cast(Chain[0]); // If the vector has an int element, default to int for the whole load. @@ -671,8 +686,28 @@ unsigned VF = VecRegSize / Sz; unsigned ChainSize = Chain.size(); - if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) + if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) { + InstructionsProcessed->append(Chain.begin(), Chain.end()); + return false; + } + + BasicBlock::iterator First, Last; + std::tie(First, Last) = getBoundaryInstrs(Chain); + unsigned StopChain = getVectorizableEnd(Chain, First, Last); + if (StopChain == 0) { + // There exists a side effect instruction, no vectorization possible. + InstructionsProcessed->append(Chain.begin(), Chain.end()); return false; + } + if (StopChain == 1) { + // Failed after the first instruction. Discard it and try the smaller chain. + InstructionsProcessed->push_back(Chain.front()); + return false; + } + + // Update Chain to the valid vectorizable subchain. + Chain = Chain.slice(0, StopChain); + ChainSize = Chain.size(); // Store size should be 1B, 2B or multiple of 4B. // TODO: Target hook for size constraint? @@ -681,11 +716,11 @@ DEBUG(dbgs() << "LSV: Size should be 1B, 2B " "or multiple of 4B. Splitting.\n"); if (SzInBytes == 3) - return vectorizeStoreChain(Chain.slice(0, ChainSize - 1)); + return vectorizeStoreChain(Chain.slice(0, ChainSize - 1), InstructionsProcessed); auto Chains = splitOddVectorElts(Chain, Sz); - return vectorizeStoreChain(Chains.first) | - vectorizeStoreChain(Chains.second); + return vectorizeStoreChain(Chains.first, InstructionsProcessed) | + vectorizeStoreChain(Chains.second, InstructionsProcessed); } VectorType *VecTy; @@ -701,8 +736,8 @@ if (ChainSize > VF) { DEBUG(dbgs() << "LSV: Vector factor is too big." " Creating two separate arrays.\n"); - return vectorizeStoreChain(Chain.slice(0, VF)) | - vectorizeStoreChain(Chain.slice(VF)); + return vectorizeStoreChain(Chain.slice(0, VF), InstructionsProcessed) | + vectorizeStoreChain(Chain.slice(VF), InstructionsProcessed); } DEBUG({ @@ -711,6 +746,10 @@ V->dump(); }); + // We won't try again to vectorize the elements of the chain, regardless of + // whether we succeed below. + InstructionsProcessed->append(Chain.begin(), Chain.end()); + // Check alignment restrictions. unsigned Alignment = getAlignment(S0); @@ -730,12 +769,6 @@ } } - BasicBlock::iterator First, Last; - std::tie(First, Last) = getBoundaryInstrs(Chain); - - if (!isVectorizable(Chain, First, Last)) - return false; - // Set insert point. Builder.SetInsertPoint(&*Last); @@ -783,7 +816,8 @@ return true; } -bool Vectorizer::vectorizeLoadChain(ArrayRef Chain) { +bool Vectorizer::vectorizeLoadChain(ArrayRef Chain, + SmallVector *InstructionsProcessed) { LoadInst *L0 = cast(Chain[0]); // If the vector has an int element, default to int for the whole load. @@ -806,8 +840,28 @@ unsigned VF = VecRegSize / Sz; unsigned ChainSize = Chain.size(); - if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) + if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) { + InstructionsProcessed->append(Chain.begin(), Chain.end()); + return false; + } + + BasicBlock::iterator First, Last; + std::tie(First, Last) = getBoundaryInstrs(Chain); + unsigned StopChain = getVectorizableEnd(Chain, First, Last); + if (StopChain == 0) { + // There exists a side effect instruction, no vectorization possible. + InstructionsProcessed->append(Chain.begin(), Chain.end()); + return false; + } + if (StopChain == 1) { + // Failed after the first instruction. Discard it and try the smaller chain. + InstructionsProcessed->push_back(Chain.front()); return false; + } + + // Update Chain to the valid vectorizable subchain. + Chain = Chain.slice(0, StopChain); + ChainSize = Chain.size(); // Load size should be 1B, 2B or multiple of 4B. // TODO: Should size constraint be a target hook? @@ -816,9 +870,10 @@ DEBUG(dbgs() << "LSV: Size should be 1B, 2B " "or multiple of 4B. Splitting.\n"); if (SzInBytes == 3) - return vectorizeLoadChain(Chain.slice(0, ChainSize - 1)); + return vectorizeLoadChain(Chain.slice(0, ChainSize - 1), InstructionsProcessed); auto Chains = splitOddVectorElts(Chain, Sz); - return vectorizeLoadChain(Chains.first) | vectorizeLoadChain(Chains.second); + return vectorizeLoadChain(Chains.first, InstructionsProcessed) | + vectorizeLoadChain(Chains.second, InstructionsProcessed); } VectorType *VecTy; @@ -834,10 +889,14 @@ if (ChainSize > VF) { DEBUG(dbgs() << "LSV: Vector factor is too big. " "Creating two separate arrays.\n"); - return vectorizeLoadChain(Chain.slice(0, VF)) | - vectorizeLoadChain(Chain.slice(VF)); + return vectorizeLoadChain(Chain.slice(0, VF), InstructionsProcessed) | + vectorizeLoadChain(Chain.slice(VF), InstructionsProcessed); } + // We won't try again to vectorize the elements of the chain, regardless of + // whether we succeed below. + InstructionsProcessed->append(Chain.begin(), Chain.end()); + // Check alignment restrictions. unsigned Alignment = getAlignment(L0); @@ -863,12 +922,6 @@ V->dump(); }); - BasicBlock::iterator First, Last; - std::tie(First, Last) = getBoundaryInstrs(Chain); - - if (!isVectorizable(Chain, First, Last)) - return false; - // Set insert point. Builder.SetInsertPoint(&*First); Index: test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll =================================================================== --- test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll +++ test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll @@ -48,8 +48,7 @@ ; CHECK-LABEL: @chain_suffix( ; CHECK: load i32 ; CHECK: store <2 x i32> -; CHECK: load i32 -; CHECK: load i32 +; CHECK: load <2 x i32> define void @chain_suffix(i32* noalias %ptr) { %next.gep = getelementptr i32, i32* %ptr, i64 0 %next.gep1 = getelementptr i32, i32* %ptr, i64 1 @@ -66,12 +65,9 @@ ; CHECK-LABEL: @chain_prefix_suffix( -; CHECK: load i32 -; CHECK: load i32 +; CHECK: load <2 x i32> ; CHECK: store <2 x i32> -; CHECK: load i32 -; CHECK: load i32 -; CHECK: load i32 +; CHECK: load <3 x i32> define void @chain_prefix_suffix(i32* noalias %ptr) { %next.gep = getelementptr i32, i32* %ptr, i64 0 %next.gep1 = getelementptr i32, i32* %ptr, i64 1