Index: /Users/rriddle/Desktop/llvm/llvm/lib/Transforms/Scalar/LoadCombine.cpp =================================================================== --- /Users/rriddle/Desktop/llvm/llvm/lib/Transforms/Scalar/LoadCombine.cpp +++ /Users/rriddle/Desktop/llvm/llvm/lib/Transforms/Scalar/LoadCombine.cpp @@ -82,7 +82,9 @@ PointerOffsetPair getPointerOffsetPair(LoadInst &); bool combineLoads(DenseMap> &); bool aggregateLoads(SmallVectorImpl &); - bool combineLoads(SmallVectorImpl &); + bool combineLoads(SmallVectorImpl &, int = 0); + bool combineLoadsSection(SmallVectorImpl &, unsigned, unsigned, + unsigned); }; } @@ -174,16 +176,44 @@ } /// \brief Given a list of combinable load. Combine the maximum number of them. -bool LoadCombine::combineLoads(SmallVectorImpl &Loads) { +bool LoadCombine::combineLoads(SmallVectorImpl &Loads, + int Offset) { + bool Combined = false; + + // Set the count of LoadPOPPairs to consider. + unsigned LoadCount = Loads.size(); + // Remove loads from the end while the size is not a power of 2. unsigned TotalSize = 0; - for (const auto &L : Loads) - TotalSize += L.Load->getType()->getPrimitiveSizeInBits(); - while (TotalSize != 0 && !isPowerOf2_32(TotalSize)) - TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits(); - if (Loads.size() < 2) - return false; + auto LoadsEnd = Loads.end(); + for (auto L = Loads.begin() + Offset; L != LoadsEnd; ++L) + TotalSize += L->Load->getType()->getPrimitiveSizeInBits(); + while (TotalSize != 0 && !isPowerOf2_32(TotalSize)) { + --LoadCount; + TotalSize -= Loads[LoadCount].Load->getType()->getPrimitiveSizeInBits(); + } + + // Combine the first section of loads and make sure that we are combining more + // than 1. + if (LoadCount - Offset > 1 && + combineLoadsSection(Loads, Offset, LoadCount - Offset, TotalSize)) + Combined = true; + + // Check for extra loads to consider. + if (Loads.size() - LoadCount < 2) + return Combined; + // Consider the next section of loads. + if (combineLoads(Loads, LoadCount)) + Combined = true; + + return Combined; +} + +// Combine a subsection of a loads array. +bool LoadCombine::combineLoadsSection(SmallVectorImpl &Loads, + unsigned Offset, unsigned Count, + unsigned TotalSize) { DEBUG({ dbgs() << "***** Combining Loads ******\n"; for (const auto &L : Loads) { @@ -191,38 +221,48 @@ } }); + // Begin and end iterators depending on the Offset and count. + const auto LoadsBegin = Loads.begin() + Offset; + const auto LoadsEnd = LoadsBegin + Count; + // Find first load. This is where we put the new load. LoadPOPPair FirstLP; FirstLP.InsertOrder = -1u; - for (const auto &L : Loads) - if (L.InsertOrder < FirstLP.InsertOrder) - FirstLP = L; + for (auto L = LoadsBegin; L != LoadsEnd; ++L) + if (L->InsertOrder < FirstLP.InsertOrder) + FirstLP = *L; + // Get the address space. unsigned AddressSpace = FirstLP.POP.Pointer->getType()->getPointerAddressSpace(); + // The offset into the base pointer. + int64_t PtrOffset = Loads[Offset].POP.Offset; + + // Create a new gep for this pointer and a loadinst for the larger load. Builder->SetInsertPoint(FirstLP.Load); Value *Ptr = Builder->CreateConstGEP1_64( - Builder->CreatePointerCast(Loads[0].POP.Pointer, + Builder->CreatePointerCast(Loads[Offset].POP.Pointer, Builder->getInt8PtrTy(AddressSpace)), - Loads[0].POP.Offset); + PtrOffset); LoadInst *NewLoad = new LoadInst( Builder->CreatePointerCast( Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize), Ptr->getType()->getPointerAddressSpace())), - Twine(Loads[0].Load->getName()) + ".combined", false, - Loads[0].Load->getAlignment(), FirstLP.Load); - - for (const auto &L : Loads) { + Twine(Loads[Offset].Load->getName()) + ".combined", false, + Loads[Offset].Load->getAlignment(), FirstLP.Load); + // Create extracts for each of the loads. + for (auto LIter = LoadsBegin; LIter != LoadsEnd; ++LIter) { + auto &L = *LIter; Builder->SetInsertPoint(L.Load); Value *V = Builder->CreateExtractInteger( L.Load->getModule()->getDataLayout(), NewLoad, - cast(L.Load->getType()), - L.POP.Offset - Loads[0].POP.Offset, "combine.extract"); + cast(L.Load->getType()), L.POP.Offset - PtrOffset, + "combine.extract"); L.Load->replaceAllUsesWith(V); } - NumLoadsCombined = NumLoadsCombined + Loads.size(); + NumLoadsCombined = NumLoadsCombined + Count; return true; } Index: /Users/rriddle/Desktop/llvm/llvm/test/Transforms/LoadCombine/load-combine-multiloadchains.ll =================================================================== --- /Users/rriddle/Desktop/llvm/llvm/test/Transforms/LoadCombine/load-combine-multiloadchains.ll +++ /Users/rriddle/Desktop/llvm/llvm/test/Transforms/LoadCombine/load-combine-multiloadchains.ll @@ -0,0 +1,23 @@ +; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @Load_MultiChain(i32* %i){ + %1 = getelementptr inbounds i32, i32* %i, i64 1 + %2 = load i32, i32* %1, align 4 + %3 = load i32, i32* %i, align 4 + %4 = getelementptr inbounds i32, i32* %i, i64 2 + %5 = load i32, i32* %4, align 4 + %6 = getelementptr inbounds i32, i32* %i, i64 3 + %7 = load i32, i32* %6, align 4 + %8 = getelementptr inbounds i32, i32* %i, i64 4 + %9 = load i32, i32* %8, align 4 + %10 = getelementptr inbounds i32, i32* %i, i64 5 + %11 = load i32, i32* %10, align 4 + %12 = add nsw i32 %3, %11 + ret i32 %12 +; CHECK-LABEL: @Load_MultiChain( +; CHECK: load i128, i128* %{{.*}}, align 4 +; CHECK: load i64, i64* %{{.*}}, align 4 +; CHECK-NOT: load i32 +}