Index: llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ llvm/trunk/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -106,11 +106,13 @@ std::pair, ArrayRef> splitOddVectorElts(ArrayRef Chain, unsigned ElementSizeBits); - /// Checks if there are any instructions which may affect the memory accessed - /// in the chain between \p From and \p To. The elements of \p Chain should be - /// all loads or all stores. - bool isVectorizable(ArrayRef Chain, BasicBlock::iterator From, - BasicBlock::iterator To); + /// Checks for instructions which may affect the memory accessed + /// in the chain between \p From and \p To. Returns Index, where + /// \p Chain[0, Index) is the largest vectorizable chain prefix. + /// The elements of \p Chain should be all loads or all stores. + unsigned getVectorizablePrefixEndIdx(ArrayRef Chain, + BasicBlock::iterator From, + BasicBlock::iterator To); /// Collects load and store instructions to vectorize. void collectInstructions(BasicBlock *BB); @@ -123,10 +125,12 @@ bool vectorizeInstructions(ArrayRef Instrs); /// Vectorizes the load instructions in Chain. - bool vectorizeLoadChain(ArrayRef Chain); + bool vectorizeLoadChain(ArrayRef Chain, + SmallPtrSet *InstructionsProcessed); /// Vectorizes the store instructions in Chain. - bool vectorizeStoreChain(ArrayRef Chain); + bool vectorizeStoreChain(ArrayRef Chain, + SmallPtrSet *InstructionsProcessed); /// Check if this load/store access is misaligned accesses bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace, @@ -421,50 +425,53 @@ return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft)); } -bool Vectorizer::isVectorizable(ArrayRef Chain, - BasicBlock::iterator From, - BasicBlock::iterator To) { +unsigned Vectorizer::getVectorizablePrefixEndIdx(ArrayRef Chain, + BasicBlock::iterator From, + BasicBlock::iterator To) { SmallVector, 16> MemoryInstrs; SmallVector, 16> ChainInstrs; - unsigned Idx = 0; - for (auto I = From, E = To; I != E; ++I, ++Idx) { + unsigned InstrIdx = 0; + for (auto I = From; I != To; ++I, ++InstrIdx) { if (isa(I) || isa(I)) { if (!is_contained(Chain, &*I)) - MemoryInstrs.push_back({&*I, Idx}); + MemoryInstrs.push_back({&*I, InstrIdx}); else - ChainInstrs.push_back({&*I, Idx}); + ChainInstrs.push_back({&*I, InstrIdx}); } else if (I->mayHaveSideEffects()) { DEBUG(dbgs() << "LSV: Found side-effecting operation: " << *I << '\n'); - return false; + return 0; } } assert(Chain.size() == ChainInstrs.size() && "All instructions in the Chain must exist in [From, To)."); - for (auto EntryMem : MemoryInstrs) { - Value *V = EntryMem.first; - unsigned VIdx = EntryMem.second; - for (auto EntryChain : ChainInstrs) { - Value *VV = EntryChain.first; - unsigned VVIdx = EntryChain.second; - if (isa(V) && isa(VV)) + unsigned ChainIdx = 0; + for (auto EntryChain : ChainInstrs) { + Value *ChainInstrValue = EntryChain.first; + unsigned ChainInstrIdx = EntryChain.second; + for (auto EntryMem : MemoryInstrs) { + Value *MemInstrValue = EntryMem.first; + unsigned MemInstrIdx = EntryMem.second; + if (isa(MemInstrValue) && isa(ChainInstrValue)) continue; // We can ignore the alias as long as the load comes before the store, // because that means we won't be moving the load past the store to // vectorize it (the vectorized load is inserted at the location of the // first load in the chain). - if (isa(V) && isa(VV) && VVIdx < VIdx) + if (isa(MemInstrValue) && isa(ChainInstrValue) && + ChainInstrIdx < MemInstrIdx) continue; // Same case, but in reverse. - if (isa(V) && isa(VV) && VVIdx > VIdx) + if (isa(MemInstrValue) && isa(ChainInstrValue) && + ChainInstrIdx > MemInstrIdx) continue; - Instruction *M0 = cast(V); - Instruction *M1 = cast(VV); + Instruction *M0 = cast(MemInstrValue); + Instruction *M1 = cast(ChainInstrValue); if (!AA.isNoAlias(MemoryLocation::get(M0), MemoryLocation::get(M1))) { DEBUG({ @@ -473,17 +480,17 @@ dbgs() << "LSV: Found alias.\n" " Aliasing instruction and pointer:\n" - << *V << " aliases " << *Ptr0 << '\n' + << *MemInstrValue << " aliases " << *Ptr0 << '\n' << " Aliased instruction and pointer:\n" - << *VV << " aliases " << *Ptr1 << '\n'; + << *ChainInstrValue << " aliases " << *Ptr1 << '\n'; }); - return false; + return ChainIdx; } } + ChainIdx++; } - - return true; + return Chain.size(); } void Vectorizer::collectInstructions(BasicBlock *BB) { @@ -614,10 +621,19 @@ } bool Changed = false; - SmallPtrSet VectorizedValues; + SmallPtrSet InstructionsProcessed; for (int Head : Heads) { - if (Tails.count(Head)) + if (InstructionsProcessed.count(Instrs[Head])) + continue; + bool longerChainExists = false; + for (unsigned TIt = 0; TIt < Tails.size(); TIt++) + if (Head == Tails[TIt] && + !InstructionsProcessed.count(Instrs[Heads[TIt]])) { + longerChainExists = true; + break; + } + if (longerChainExists) continue; // We found an instr that starts a chain. Now follow the chain and try to @@ -625,7 +641,7 @@ SmallVector Operands; int I = Head; while (I != -1 && (Tails.count(I) || Heads.count(I))) { - if (VectorizedValues.count(Instrs[I])) + if (InstructionsProcessed.count(Instrs[I])) break; Operands.push_back(Instrs[I]); @@ -634,20 +650,18 @@ bool Vectorized = false; if (isa(*Operands.begin())) - Vectorized = vectorizeLoadChain(Operands); + Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed); else - Vectorized = vectorizeStoreChain(Operands); + Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed); - // Mark the vectorized instructions so that we don't vectorize them again. - if (Vectorized) - VectorizedValues.insert(Operands.begin(), Operands.end()); Changed |= Vectorized; } return Changed; } -bool Vectorizer::vectorizeStoreChain(ArrayRef Chain) { +bool Vectorizer::vectorizeStoreChain( + ArrayRef Chain, SmallPtrSet *InstructionsProcessed) { StoreInst *S0 = cast(Chain[0]); // If the vector has an int element, default to int for the whole load. @@ -670,8 +684,28 @@ unsigned VF = VecRegSize / Sz; unsigned ChainSize = Chain.size(); - if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) + if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) { + InstructionsProcessed->insert(Chain.begin(), Chain.end()); + return false; + } + + BasicBlock::iterator First, Last; + std::tie(First, Last) = getBoundaryInstrs(Chain); + unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last); + if (StopChain == 0) { + // There exists a side effect instruction, no vectorization possible. + InstructionsProcessed->insert(Chain.begin(), Chain.end()); return false; + } + if (StopChain == 1) { + // Failed after the first instruction. Discard it and try the smaller chain. + InstructionsProcessed->insert(Chain.front()); + return false; + } + + // Update Chain to the valid vectorizable subchain. + Chain = Chain.slice(0, StopChain); + ChainSize = Chain.size(); // Store size should be 1B, 2B or multiple of 4B. // TODO: Target hook for size constraint? @@ -680,11 +714,12 @@ DEBUG(dbgs() << "LSV: Size should be 1B, 2B " "or multiple of 4B. Splitting.\n"); if (SzInBytes == 3) - return vectorizeStoreChain(Chain.slice(0, ChainSize - 1)); + return vectorizeStoreChain(Chain.slice(0, ChainSize - 1), + InstructionsProcessed); auto Chains = splitOddVectorElts(Chain, Sz); - return vectorizeStoreChain(Chains.first) | - vectorizeStoreChain(Chains.second); + return vectorizeStoreChain(Chains.first, InstructionsProcessed) | + vectorizeStoreChain(Chains.second, InstructionsProcessed); } VectorType *VecTy; @@ -700,8 +735,8 @@ if (ChainSize > VF) { DEBUG(dbgs() << "LSV: Vector factor is too big." " Creating two separate arrays.\n"); - return vectorizeStoreChain(Chain.slice(0, VF)) | - vectorizeStoreChain(Chain.slice(VF)); + return vectorizeStoreChain(Chain.slice(0, VF), InstructionsProcessed) | + vectorizeStoreChain(Chain.slice(VF), InstructionsProcessed); } DEBUG({ @@ -710,6 +745,10 @@ V->dump(); }); + // We won't try again to vectorize the elements of the chain, regardless of + // whether we succeed below. + InstructionsProcessed->insert(Chain.begin(), Chain.end()); + // Check alignment restrictions. unsigned Alignment = getAlignment(S0); @@ -729,12 +768,6 @@ } } - BasicBlock::iterator First, Last; - std::tie(First, Last) = getBoundaryInstrs(Chain); - - if (!isVectorizable(Chain, First, Last)) - return false; - // Set insert point. Builder.SetInsertPoint(&*Last); @@ -782,7 +815,8 @@ return true; } -bool Vectorizer::vectorizeLoadChain(ArrayRef Chain) { +bool Vectorizer::vectorizeLoadChain( + ArrayRef Chain, SmallPtrSet *InstructionsProcessed) { LoadInst *L0 = cast(Chain[0]); // If the vector has an int element, default to int for the whole load. @@ -805,8 +839,28 @@ unsigned VF = VecRegSize / Sz; unsigned ChainSize = Chain.size(); - if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) + if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) { + InstructionsProcessed->insert(Chain.begin(), Chain.end()); + return false; + } + + BasicBlock::iterator First, Last; + std::tie(First, Last) = getBoundaryInstrs(Chain); + unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last); + if (StopChain == 0) { + // There exists a side effect instruction, no vectorization possible. + InstructionsProcessed->insert(Chain.begin(), Chain.end()); + return false; + } + if (StopChain == 1) { + // Failed after the first instruction. Discard it and try the smaller chain. + InstructionsProcessed->insert(Chain.front()); return false; + } + + // Update Chain to the valid vectorizable subchain. + Chain = Chain.slice(0, StopChain); + ChainSize = Chain.size(); // Load size should be 1B, 2B or multiple of 4B. // TODO: Should size constraint be a target hook? @@ -815,9 +869,11 @@ DEBUG(dbgs() << "LSV: Size should be 1B, 2B " "or multiple of 4B. Splitting.\n"); if (SzInBytes == 3) - return vectorizeLoadChain(Chain.slice(0, ChainSize - 1)); + return vectorizeLoadChain(Chain.slice(0, ChainSize - 1), + InstructionsProcessed); auto Chains = splitOddVectorElts(Chain, Sz); - return vectorizeLoadChain(Chains.first) | vectorizeLoadChain(Chains.second); + return vectorizeLoadChain(Chains.first, InstructionsProcessed) | + vectorizeLoadChain(Chains.second, InstructionsProcessed); } VectorType *VecTy; @@ -833,10 +889,14 @@ if (ChainSize > VF) { DEBUG(dbgs() << "LSV: Vector factor is too big. " "Creating two separate arrays.\n"); - return vectorizeLoadChain(Chain.slice(0, VF)) | - vectorizeLoadChain(Chain.slice(VF)); + return vectorizeLoadChain(Chain.slice(0, VF), InstructionsProcessed) | + vectorizeLoadChain(Chain.slice(VF), InstructionsProcessed); } + // We won't try again to vectorize the elements of the chain, regardless of + // whether we succeed below. + InstructionsProcessed->insert(Chain.begin(), Chain.end()); + // Check alignment restrictions. unsigned Alignment = getAlignment(L0); @@ -862,12 +922,6 @@ V->dump(); }); - BasicBlock::iterator First, Last; - std::tie(First, Last) = getBoundaryInstrs(Chain); - - if (!isVectorizable(Chain, First, Last)) - return false; - // Set insert point. Builder.SetInsertPoint(&*First); Index: llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll =================================================================== --- llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll +++ llvm/trunk/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll @@ -48,8 +48,7 @@ ; CHECK-LABEL: @chain_suffix( ; CHECK: load i32 ; CHECK: store <2 x i32> -; CHECK: load i32 -; CHECK: load i32 +; CHECK: load <2 x i32> define void @chain_suffix(i32* noalias %ptr) { %next.gep = getelementptr i32, i32* %ptr, i64 0 %next.gep1 = getelementptr i32, i32* %ptr, i64 1 @@ -66,12 +65,9 @@ ; CHECK-LABEL: @chain_prefix_suffix( -; CHECK: load i32 -; CHECK: load i32 +; CHECK: load <2 x i32> ; CHECK: store <2 x i32> -; CHECK: load i32 -; CHECK: load i32 -; CHECK: load i32 +; CHECK: load <3 x i32> define void @chain_prefix_suffix(i32* noalias %ptr) { %next.gep = getelementptr i32, i32* %ptr, i64 0 %next.gep1 = getelementptr i32, i32* %ptr, i64 1