Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -368,10 +368,10 @@ BoUpSLP(Function *Func, ScalarEvolution *Se, TargetTransformInfo *Tti, TargetLibraryInfo *TLi, AliasAnalysis *Aa, LoopInfo *Li, - DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB) + DominatorTree *Dt, AssumptionCache *AC, DemandedBits *DB, const DataLayout *DL) : NumLoadsWantToKeepOrder(0), NumLoadsWantToChangeOrder(0), F(Func), SE(Se), TTI(Tti), TLI(TLi), AA(Aa), LI(Li), DT(Dt), AC(AC), DB(DB), - Builder(Se->getContext()) { + DL(DL), Builder(Se->getContext()) { CodeMetrics::collectEphemeralValues(F, AC, EphValues); } @@ -925,6 +925,7 @@ DominatorTree *DT; AssumptionCache *AC; DemandedBits *DB; + const DataLayout *DL; /// Instruction builder to construct the vectorized tree. IRBuilder<> Builder; @@ -1176,11 +1177,10 @@ // loading/storing it as an i8 struct. If we vectorize loads/stores from // such a struct we read/write packed bits disagreeing with the // unvectorized version. - const DataLayout &DL = F->getParent()->getDataLayout(); Type *ScalarTy = VL[0]->getType(); - if (DL.getTypeSizeInBits(ScalarTy) != - DL.getTypeAllocSizeInBits(ScalarTy)) { + if (DL->getTypeSizeInBits(ScalarTy) != + DL->getTypeAllocSizeInBits(ScalarTy)) { BS.cancelScheduling(VL); newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); @@ -1196,8 +1196,8 @@ return; } - if (!isConsecutiveAccess(VL[i], VL[i + 1], DL, *SE)) { - if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], DL, *SE)) { + if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { + if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], *DL, *SE)) { ++NumLoadsWantToChangeOrder; } BS.cancelScheduling(VL); @@ -1366,10 +1366,9 @@ return; } case Instruction::Store: { - const DataLayout &DL = F->getParent()->getDataLayout(); // Check if the stores are consecutive or of we need to swizzle them. for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) - if (!isConsecutiveAccess(VL[i], VL[i + 1], DL, *SE)) { + if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { BS.cancelScheduling(VL); newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); @@ -1872,8 +1871,6 @@ void BoUpSLP::reorderAltShuffleOperands(ArrayRef VL, SmallVectorImpl &Left, SmallVectorImpl &Right) { - const DataLayout &DL = F->getParent()->getDataLayout(); - // Push left and right operands of binary operation into Left and Right for (unsigned i = 0, e = VL.size(); i < e; ++i) { Left.push_back(cast(VL[i])->getOperand(0)); @@ -1887,10 +1884,10 @@ if (LoadInst *L1 = dyn_cast(Right[j + 1])) { Instruction *VL1 = cast(VL[j]); Instruction *VL2 = cast(VL[j + 1]); - if (VL1->isCommutative() && isConsecutiveAccess(L, L1, DL, *SE)) { + if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) { std::swap(Left[j], Right[j]); continue; - } else if (VL2->isCommutative() && isConsecutiveAccess(L, L1, DL, *SE)) { + } else if (VL2->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -1901,10 +1898,10 @@ if (LoadInst *L1 = dyn_cast(Left[j + 1])) { Instruction *VL1 = cast(VL[j]); Instruction *VL2 = cast(VL[j + 1]); - if (VL1->isCommutative() && isConsecutiveAccess(L, L1, DL, *SE)) { + if (VL1->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) { std::swap(Left[j], Right[j]); continue; - } else if (VL2->isCommutative() && isConsecutiveAccess(L, L1, DL, *SE)) { + } else if (VL2->isCommutative() && isConsecutiveAccess(L, L1, *DL, *SE)) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -2034,8 +2031,6 @@ if (SplatRight || SplatLeft) return; - const DataLayout &DL = F->getParent()->getDataLayout(); - // Finally check if we can get longer vectorizable chain by reordering // without breaking the good operand order detected above. // E.g. If we have something like- @@ -2054,7 +2049,7 @@ for (unsigned j = 0; j < VL.size() - 1; ++j) { if (LoadInst *L = dyn_cast(Left[j])) { if (LoadInst *L1 = dyn_cast(Right[j + 1])) { - if (isConsecutiveAccess(L, L1, DL, *SE)) { + if (isConsecutiveAccess(L, L1, *DL, *SE)) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -2062,7 +2057,7 @@ } if (LoadInst *L = dyn_cast(Right[j])) { if (LoadInst *L1 = dyn_cast(Left[j + 1])) { - if (isConsecutiveAccess(L, L1, DL, *SE)) { + if (isConsecutiveAccess(L, L1, *DL, *SE)) { std::swap(Left[j + 1], Right[j + 1]); continue; } @@ -2158,7 +2153,6 @@ return Gather(E->Scalars, VecTy); } - const DataLayout &DL = F->getParent()->getDataLayout(); unsigned Opcode = getSameOpcode(E->Scalars); switch (Opcode) { @@ -2355,7 +2349,7 @@ unsigned Alignment = LI->getAlignment(); LI = Builder.CreateLoad(VecPtr); if (!Alignment) { - Alignment = DL.getABITypeAlignment(ScalarLoadTy); + Alignment = DL->getABITypeAlignment(ScalarLoadTy); } LI->setAlignment(Alignment); E->VectorizedValue = LI; @@ -2386,7 +2380,7 @@ ExternalUser(SI->getPointerOperand(), cast(VecPtr), 0)); if (!Alignment) { - Alignment = DL.getABITypeAlignment(SI->getValueOperand()->getType()); + Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType()); } S->setAlignment(Alignment); E->VectorizedValue = S; @@ -3133,12 +3127,10 @@ } unsigned BoUpSLP::getVectorElementSize(Value *V) { - auto &DL = F->getParent()->getDataLayout(); - // If V is a store, just return the width of the stored value without // traversing the expression tree. This is the common case. if (auto *Store = dyn_cast(V)) - return DL.getTypeSizeInBits(Store->getValueOperand()->getType()); + return DL->getTypeSizeInBits(Store->getValueOperand()->getType()); // If V is not a store, we can traverse the expression tree to find loads // that feed it. The type of the loaded value may indicate a more suitable @@ -3166,7 +3158,7 @@ // If the current instruction is a load, update MaxWidth to reflect the // width of the loaded value. else if (isa(I)) - MaxWidth = std::max(MaxWidth, DL.getTypeSizeInBits(Ty)); + MaxWidth = std::max(MaxWidth, DL->getTypeSizeInBits(Ty)); // Otherwise, we need to visit the operands of the instruction. We only // handle the interesting cases from buildTree here. If an operand is an @@ -3187,7 +3179,7 @@ // If we didn't encounter a memory access in the expression tree, or if we // gave up for some reason, just return the width of V. if (!MaxWidth || FoundUnknownInst) - return DL.getTypeSizeInBits(V->getType()); + return DL->getTypeSizeInBits(V->getType()); // Otherwise, return the maximum width we found. return MaxWidth; @@ -3265,8 +3257,6 @@ } void BoUpSLP::computeMinimumValueSizes() { - auto &DL = F->getParent()->getDataLayout(); - // If there are no external uses, the expression tree must be rooted by a // store. We can't demote in-memory values, so there is nothing to do here. if (ExternalUses.empty()) @@ -3334,11 +3324,11 @@ // We start by looking at each entry that can be demoted. We compute the // maximum bit width required to store the scalar by using ValueTracking to // compute the number of high-order bits we can truncate. - if (MaxBitWidth == DL.getTypeSizeInBits(TreeRoot[0]->getType())) { + if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType())) { MaxBitWidth = 8u; for (auto *Scalar : ToDemote) { - auto NumSignBits = ComputeNumSignBits(Scalar, DL, 0, AC, 0, DT); - auto NumTypeBits = DL.getTypeSizeInBits(Scalar->getType()); + auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, 0, DT); + auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType()); MaxBitWidth = std::max(NumTypeBits - NumSignBits, MaxBitWidth); } } @@ -3385,6 +3375,12 @@ DominatorTree *DT; AssumptionCache *AC; DemandedBits *DB; + const DataLayout *DL; + + bool doInitialization(Module &M) override { + DL = &M.getDataLayout(); + return false; + } bool runOnFunction(Function &F) override { if (skipOptnoneFunction(F)) @@ -3430,7 +3426,7 @@ // Use the bottom up slp vectorizer to construct chains that start with // store instructions. - BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB); + BoUpSLP R(&F, SE, TTI, TLI, AA, LI, DT, AC, DB, DL); // A general note: the vectorizer must use BoUpSLP::eraseInstruction() to // delete instructions. @@ -3611,7 +3607,6 @@ // all of the pairs of stores that follow each other. SmallVector IndexQueue; for (unsigned i = 0, e = Stores.size(); i < e; ++i) { - const DataLayout &DL = Stores[i]->getModule()->getDataLayout(); IndexQueue.clear(); // If a store has multiple consecutive store candidates, search Stores // array according to the sequence: from i+1 to e, then from i-1 to 0. @@ -3624,7 +3619,7 @@ IndexQueue.push_back(j - 1); for (auto &k : IndexQueue) { - if (isConsecutiveAccess(Stores[i], Stores[k], DL, *SE)) { + if (isConsecutiveAccess(Stores[i], Stores[k], *DL, *SE)) { Tails.insert(Stores[k]); Heads.insert(Stores[i]); ConsecutiveChain[Stores[i]] = Stores[k]; @@ -3673,7 +3668,6 @@ Stores.clear(); GEPs.clear(); NumStores = NumGEPs = 0; - const DataLayout &DL = BB->getModule()->getDataLayout(); // Visit the store and getelementptr instructions in BB and organize them in // Stores and GEPs according to the underlying objects of their pointer @@ -3687,7 +3681,7 @@ continue; if (!isValidElementType(SI->getValueOperand()->getType())) continue; - Stores[GetUnderlyingObject(SI->getPointerOperand(), DL)].push_back(SI); + Stores[GetUnderlyingObject(SI->getPointerOperand(), *DL)].push_back(SI); ++NumStores; } @@ -3700,7 +3694,7 @@ continue; if (!isValidElementType(Idx->getType())) continue; - GEPs[GetUnderlyingObject(GEP->getPointerOperand(), DL)].push_back(GEP); + GEPs[GetUnderlyingObject(GEP->getPointerOperand(), *DL)].push_back(GEP); ++NumGEPs; } }