Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1153,8 +1153,7 @@ /// Set the Builder insert point to one after the last instruction in /// the bundle - void setInsertPointAfterBundle(ArrayRef VL, - const InstructionsState &S); + void setInsertPointAfterBundle(TreeEntry *E); /// \returns a vector from a collection of scalars in \p VL. Value *Gather(ArrayRef VL, VectorType *Ty); @@ -1220,6 +1219,10 @@ /// reordering of operands during buildTree_rec() and vectorizeTree(). SmallVector Operands; + /// The main/alternate instruction. + Instruction *MainOp = nullptr; + Instruction *AltOp = nullptr; + public: /// Set this bundle's \p OpIdx'th operand to \p OpVL. void setOperand(unsigned OpIdx, ArrayRef OpVL, @@ -1256,6 +1259,58 @@ return Operands[OpIdx][0]; } + /// Some of the instructions in the list have alternate opcodes. + bool isAltShuffle() const { + return getOpcode() != getAltOpcode();; + } + + bool isOpcodeOrAlt(Instruction *I) const { + unsigned CheckedOpcode = I->getOpcode(); + return (getOpcode() == CheckedOpcode || + getAltOpcode() == CheckedOpcode); + } + + /// Chooses the correct key for scheduling data. If \p Op has the same (or + /// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is + /// \p OpValue. + Value *isOneOf(Value *Op) const { + auto *I = dyn_cast(Op); + if (I && isOpcodeOrAlt(I)) + return Op; + return MainOp; + } + + void setOperations(InstructionsState *S) { + MainOp = S->MainOp; + AltOp = S->AltOp; + } + + Instruction *getMainOp() const { + return MainOp; + } + + Instruction *getAltOp() const { + return AltOp; + } + + /// The main/alternate opcodes for the list of instructions. + unsigned getOpcode() const { + return MainOp ? MainOp->getOpcode() : 0; + } + + unsigned getAltOpcode() const { + return AltOp ? AltOp->getOpcode() : 0; + } + + bool updateStateIfReorder() { + if (!ReorderIndices.empty()) { + InstructionsState S = getSameOpcode(Scalars, ReorderIndices.front()); + setOperations(&S); + return true; + } + return false; + } + #ifndef NDEBUG /// Debug printer. LLVM_DUMP_METHOD void dump() const { @@ -1269,6 +1324,8 @@ for (Value *V : Scalars) dbgs().indent(2) << *V << "\n"; dbgs() << "NeedToGather: " << NeedToGather << "\n"; + dbgs() << "MainOp: " << *MainOp << "\n"; + dbgs() << "AltOp: " << *AltOp << "\n"; dbgs() << "VectorizedValue: "; if (VectorizedValue) dbgs() << *VectorizedValue; @@ -1296,7 +1353,7 @@ /// Create a new VectorizableTree entry. TreeEntry *newTreeEntry(ArrayRef VL, bool Vectorized, - const EdgeInfo &UserTreeIdx, + InstructionsState *S, const EdgeInfo &UserTreeIdx, ArrayRef ReuseShuffleIndices = None, ArrayRef ReorderIndices = None) { VectorizableTree.push_back(llvm::make_unique(VectorizableTree)); @@ -1307,10 +1364,11 @@ Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(), ReuseShuffleIndices.end()); Last->ReorderIndices = ReorderIndices; + Last->setOperations(S); if (Vectorized) { for (int i = 0, e = VL.size(); i != e; ++i) { assert(!getTreeEntry(VL[i]) && "Scalar already in tree!"); - ScalarToTreeEntry[VL[i]] = Last->Idx; + ScalarToTreeEntry[VL[i]] = Last; } } else { MustGather.insert(VL.begin(), VL.end()); @@ -1338,21 +1396,21 @@ #endif TreeEntry *getTreeEntry(Value *V) { - auto I = ScalarToTreeEntry.find(V); - if (I != ScalarToTreeEntry.end()) - return VectorizableTree[I->second].get(); + auto It = ScalarToTreeEntry.find(V); + if (It != ScalarToTreeEntry.end()) + return It->second; return nullptr; } const TreeEntry *getTreeEntry(Value *V) const { - auto I = ScalarToTreeEntry.find(V); - if (I != ScalarToTreeEntry.end()) - return VectorizableTree[I->second].get(); + auto It = ScalarToTreeEntry.find(V); + if (It != ScalarToTreeEntry.end()) + return It->second; return nullptr; } /// Maps a specific scalar to its tree entry. - SmallDenseMap ScalarToTreeEntry; + SmallDenseMap ScalarToTreeEntry; /// A list of scalars that we found that we need to keep as scalars. ValueSet MustGather; @@ -2026,28 +2084,28 @@ InstructionsState S = getSameOpcode(VL); if (Depth == RecursionMaxDepth) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } // Don't handle vectors. if (S.OpValue->getType()->isVectorTy()) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } if (StoreInst *SI = dyn_cast(S.OpValue)) if (SI->getValueOperand()->getType()->isVectorTy()) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } // If all of the operands are identical or constant we have a simple solution. if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } @@ -2059,7 +2117,7 @@ if (EphValues.count(VL[i])) { LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << ") is ephemeral.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } } @@ -2069,7 +2127,7 @@ LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n"); if (!E->isSame(VL)) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } // Record the reuse of the tree node. FIXME, currently this is only used to @@ -2089,7 +2147,7 @@ if (getTreeEntry(I)) { LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << ") is already in tree.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } } @@ -2100,7 +2158,7 @@ for (unsigned i = 0, e = VL.size(); i != e; ++i) { if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } } @@ -2114,7 +2172,7 @@ // Don't go into unreachable blocks. They may contain instructions with // dependency cycles which confuse the final scheduling. LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } @@ -2134,7 +2192,7 @@ LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); if (UniqueValues.size() <= 1 || !llvm::isPowerOf2_32(UniqueValues.size())) { LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } VL = UniqueValues; @@ -2151,7 +2209,7 @@ assert((!BS.getScheduleData(VL0) || !BS.getScheduleData(VL0)->isPartOfBundle()) && "tryScheduleBundle should cancelScheduling on failure"); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); return; } LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); @@ -2172,12 +2230,12 @@ LLVM_DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (terminator use).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n"); for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { @@ -2198,7 +2256,7 @@ if (Reuse) { LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n"); ++NumOpsWantToKeepOriginalOrder; - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, + newTreeEntry(VL, /*Vectorized=*/true, &S, UserTreeIdx, ReuseShuffleIndicies); // This is a special case, as it does not gather, but at the same time // we are not extending buildTree_rec() towards the operands. @@ -2220,7 +2278,7 @@ auto StoredCurrentOrderAndNum = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first; ++StoredCurrentOrderAndNum->getSecond(); - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies, + newTreeEntry(VL, /*Vectorized=*/true, &S, UserTreeIdx, ReuseShuffleIndicies, StoredCurrentOrderAndNum->getFirst()); // This is a special case, as it does not gather, but at the same time // we are not extending buildTree_rec() towards the operands. @@ -2230,7 +2288,7 @@ return; } LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n"); - newTreeEntry(VL, /*Vectorized=*/false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, /*Vectorized=*/false, &S, UserTreeIdx, ReuseShuffleIndicies); BS.cancelScheduling(VL, VL0); return; } @@ -2246,7 +2304,7 @@ if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); return; } @@ -2259,7 +2317,7 @@ auto *L = cast(V); if (!L->isSimple()) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); return; } @@ -2289,14 +2347,14 @@ if (CurrentOrder.empty()) { // Original loads are consecutive and does not require reordering. ++NumOpsWantToKeepOriginalOrder; - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, + newTreeEntry(VL, /*Vectorized=*/true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n"); } else { // Need to reorder. auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first; ++I->getSecond(); - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, + newTreeEntry(VL, /*Vectorized=*/true, &S, UserTreeIdx, ReuseShuffleIndicies, I->getFirst()); LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n"); } @@ -2306,7 +2364,7 @@ LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); return; } case Instruction::ZExt: @@ -2326,13 +2384,13 @@ Type *Ty = cast(VL[i])->getOperand(0)->getType(); if (Ty != SrcTy || !isValidElementType(Ty)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n"); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { @@ -2356,14 +2414,14 @@ if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) || Cmp->getOperand(0)->getType() != ComparedTy) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n"); ValueList Left, Right; @@ -2409,7 +2467,7 @@ case Instruction::And: case Instruction::Or: case Instruction::Xor: { - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n"); // Sort operands of the instructions so that each side is more likely to @@ -2438,7 +2496,7 @@ if (cast(VL[j])->getNumOperands() != 2) { LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); return; } } @@ -2452,7 +2510,7 @@ LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); return; } } @@ -2464,12 +2522,12 @@ LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); for (unsigned i = 0, e = 2; i < e; ++i) { ValueList Operands; @@ -2486,12 +2544,12 @@ for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); return; } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n"); ValueList Operands; @@ -2509,7 +2567,7 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (!isTriviallyVectorizable(ID)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } @@ -2525,7 +2583,7 @@ getVectorIntrinsicIDForCall(CI2, TLI) != ID || !CI->hasIdenticalOperandBundleSchema(*CI2)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] << "\n"); return; @@ -2537,7 +2595,7 @@ Value *A1J = CI2->getArgOperand(j); if (ScalarArgs[j] != A1J) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI << " argument " << ScalarArgs[j] << "!=" << A1J << "\n"); @@ -2551,14 +2609,14 @@ CI->op_begin() + CI->getBundleOperandsEndIndex(), CI2->op_begin() + CI2->getBundleOperandsStartIndex())) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!=" << *VL[i] << '\n'); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) { ValueList Operands; // Prepare the operand vector. @@ -2575,11 +2633,11 @@ // then do not vectorize this instruction. if (!S.isAltShuffle()) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); return; } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); // Reorder operands if reordering would enable vectorization. @@ -2603,7 +2661,7 @@ } default: BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); return; } @@ -2738,7 +2796,7 @@ return ReuseShuffleCost + TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); } - if (getSameOpcode(VL).getOpcode() == Instruction::ExtractElement && + if (E->getOpcode() == Instruction::ExtractElement && allSameType(VL) && allSameBlock(VL)) { Optional ShuffleKind = isShuffle(VL); if (ShuffleKind.hasValue()) { @@ -2748,10 +2806,10 @@ // instruction itself is not going to be vectorized, consider this // instruction as dead and remove its cost from the final cost of the // vectorized tree. - if (areAllUsersVectorized(cast(V)) && - !ScalarToTreeEntry.count(V)) { + auto *I = cast(V); + if (areAllUsersVectorized(I) && !ScalarToTreeEntry.count(I)) { auto *IO = cast( - cast(V)->getIndexOperand()); + cast(I)->getIndexOperand()); Cost -= TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, IO->getZExtValue()); } @@ -2761,11 +2819,10 @@ } return ReuseShuffleCost + getGatherCost(VL); } - InstructionsState S = getSameOpcode(VL); - assert(S.getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL"); - Instruction *VL0 = cast(S.OpValue); - unsigned ShuffleOrOp = S.isAltShuffle() ? - (unsigned) Instruction::ShuffleVector : S.getOpcode(); + assert(E->getOpcode() && allSameType(VL) && allSameBlock(VL) && "Invalid VL"); + Instruction *VL0 = E->getMainOp(); + unsigned ShuffleOrOp = E->isAltShuffle() ? + (unsigned) Instruction::ShuffleVector : E->getOpcode(); switch (ShuffleOrOp) { case Instruction::PHI: return 0; @@ -2851,7 +2908,7 @@ case Instruction::BitCast: { Type *SrcTy = VL0->getOperand(0)->getType(); int ScalarEltCost = - TTI->getCastInstrCost(S.getOpcode(), ScalarTy, SrcTy, VL0); + TTI->getCastInstrCost(E->getOpcode(), ScalarTy, SrcTy, VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } @@ -2864,7 +2921,7 @@ // Check if the values are candidates to demote. if (!MinBWs.count(VL0) || VecTy != SrcVecTy) { VecCost = ReuseShuffleCost + - TTI->getCastInstrCost(S.getOpcode(), VecTy, SrcVecTy, VL0); + TTI->getCastInstrCost(E->getOpcode(), VecTy, SrcVecTy, VL0); } return VecCost - ScalarCost; } @@ -2872,14 +2929,14 @@ case Instruction::ICmp: case Instruction::Select: { // Calculate the cost of this instruction. - int ScalarEltCost = TTI->getCmpSelInstrCost(S.getOpcode(), ScalarTy, + int ScalarEltCost = TTI->getCmpSelInstrCost(E->getOpcode(), ScalarTy, Builder.getInt1Ty(), VL0); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = TTI->getCmpSelInstrCost(S.getOpcode(), VecTy, MaskTy, VL0); + int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy, VL0); return ReuseShuffleCost + VecCost - ScalarCost; } case Instruction::FNeg: @@ -2940,12 +2997,12 @@ SmallVector Operands(VL0->operand_values()); int ScalarEltCost = TTI->getArithmeticInstrCost( - S.getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands); + E->getOpcode(), ScalarTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands); if (NeedToShuffleReuses) { ReuseShuffleCost -= (ReuseShuffleNumbers - VL.size()) * ScalarEltCost; } int ScalarCost = VecTy->getNumElements() * ScalarEltCost; - int VecCost = TTI->getArithmeticInstrCost(S.getOpcode(), VecTy, Op1VK, + int VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, Op1VK, Op2VK, Op1VP, Op2VP, Operands); return ReuseShuffleCost + VecCost - ScalarCost; } @@ -3027,11 +3084,11 @@ return ReuseShuffleCost + VecCallCost - ScalarCallCost; } case Instruction::ShuffleVector: { - assert(S.isAltShuffle() && - ((Instruction::isBinaryOp(S.getOpcode()) && - Instruction::isBinaryOp(S.getAltOpcode())) || - (Instruction::isCast(S.getOpcode()) && - Instruction::isCast(S.getAltOpcode()))) && + assert(E->isAltShuffle() && + ((Instruction::isBinaryOp(E->getOpcode()) && + Instruction::isBinaryOp(E->getAltOpcode())) || + (Instruction::isCast(E->getOpcode()) && + Instruction::isCast(E->getAltOpcode()))) && "Invalid Shuffle Vector Operand"); int ScalarCost = 0; if (NeedToShuffleReuses) { @@ -3048,23 +3105,23 @@ } for (Value *i : VL) { Instruction *I = cast(i); - assert(S.isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); + assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); ScalarCost += TTI->getInstructionCost( I, TargetTransformInfo::TCK_RecipThroughput); } // VecCost is equal to sum of the cost of creating 2 vectors // and the cost of creating shuffle. int VecCost = 0; - if (Instruction::isBinaryOp(S.getOpcode())) { - VecCost = TTI->getArithmeticInstrCost(S.getOpcode(), VecTy); - VecCost += TTI->getArithmeticInstrCost(S.getAltOpcode(), VecTy); + if (Instruction::isBinaryOp(E->getOpcode())) { + VecCost = TTI->getArithmeticInstrCost(E->getOpcode(), VecTy); + VecCost += TTI->getArithmeticInstrCost(E->getAltOpcode(), VecTy); } else { - Type *Src0SclTy = S.MainOp->getOperand(0)->getType(); - Type *Src1SclTy = S.AltOp->getOperand(0)->getType(); + Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType(); + Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); VectorType *Src0Ty = VectorType::get(Src0SclTy, VL.size()); VectorType *Src1Ty = VectorType::get(Src1SclTy, VL.size()); - VecCost = TTI->getCastInstrCost(S.getOpcode(), VecTy, Src0Ty); - VecCost += TTI->getCastInstrCost(S.getAltOpcode(), VecTy, Src1Ty); + VecCost = TTI->getCastInstrCost(E->getOpcode(), VecTy, Src0Ty); + VecCost += TTI->getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty); } VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Select, VecTy, 0); return ReuseShuffleCost + VecCost - ScalarCost; @@ -3319,16 +3376,16 @@ Right = Ops.getVL(1); } -void BoUpSLP::setInsertPointAfterBundle(ArrayRef VL, - const InstructionsState &S) { +void BoUpSLP::setInsertPointAfterBundle(TreeEntry *E) { // Get the basic block this bundle is in. All instructions in the bundle // should be in this block. - auto *Front = cast(S.OpValue); + auto *Front = E->getMainOp(); auto *BB = Front->getParent(); - assert(llvm::all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool { - auto *I = cast(V); - return !S.isOpcodeOrAlt(I) || I->getParent() == BB; - })); + assert(llvm::all_of(make_range(E->Scalars.begin(), E->Scalars.end()), + [=](Value *V) -> bool { + auto *I = cast(V); + return !E->isOpcodeOrAlt(I) || I->getParent() == BB; + })); // The last instruction in the bundle in program order. Instruction *LastInst = nullptr; @@ -3339,7 +3396,7 @@ // bundle. The end of the bundle is marked by null ScheduleData. if (BlocksSchedules.count(BB)) { auto *Bundle = - BlocksSchedules[BB]->getScheduleData(isOneOf(S, VL.back())); + BlocksSchedules[BB]->getScheduleData(E->isOneOf(E->Scalars.back())); if (Bundle && Bundle->isPartOfBundle()) for (; Bundle; Bundle = Bundle->NextInBundle) if (Bundle->OpValue == Bundle->Inst) @@ -3365,9 +3422,9 @@ // we both exit early from buildTree_rec and that the bundle be out-of-order // (causing us to iterate all the way to the end of the block). if (!LastInst) { - SmallPtrSet Bundle(VL.begin(), VL.end()); + SmallPtrSet Bundle(E->Scalars.begin(), E->Scalars.end()); for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) { - if (Bundle.erase(&I) && S.isOpcodeOrAlt(&I)) + if (Bundle.erase(&I) && E->isOpcodeOrAlt(&I)) LastInst = &I; if (Bundle.empty()) break; @@ -3494,8 +3551,7 @@ return E->VectorizedValue; } - InstructionsState S = getSameOpcode(E->Scalars); - Instruction *VL0 = cast(S.OpValue); + Instruction *VL0 = E->getMainOp(); Type *ScalarTy = VL0->getType(); if (StoreInst *SI = dyn_cast(VL0)) ScalarTy = SI->getValueOperand()->getType(); @@ -3504,7 +3560,7 @@ bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); if (E->NeedToGather) { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3518,8 +3574,8 @@ return V; } - unsigned ShuffleOrOp = S.isAltShuffle() ? - (unsigned) Instruction::ShuffleVector : S.getOpcode(); + unsigned ShuffleOrOp = E->isAltShuffle() ? + (unsigned) Instruction::ShuffleVector : E->getOpcode(); switch (ShuffleOrOp) { case Instruction::PHI: { PHINode *PH = dyn_cast(VL0); @@ -3577,7 +3633,7 @@ E->VectorizedValue = V; return V; } - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3612,7 +3668,7 @@ E->VectorizedValue = NewV; return NewV; } - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3637,7 +3693,7 @@ case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *InVec = vectorizeTree(E->getOperand(0)); @@ -3658,7 +3714,7 @@ } case Instruction::FCmp: case Instruction::ICmp: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *L = vectorizeTree(E->getOperand(0)); Value *R = vectorizeTree(E->getOperand(1)); @@ -3670,7 +3726,7 @@ CmpInst::Predicate P0 = cast(VL0)->getPredicate(); Value *V; - if (S.getOpcode() == Instruction::FCmp) + if (E->getOpcode() == Instruction::FCmp) V = Builder.CreateFCmp(P0, L, R); else V = Builder.CreateICmp(P0, L, R); @@ -3685,7 +3741,7 @@ return V; } case Instruction::Select: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *Cond = vectorizeTree(E->getOperand(0)); Value *True = vectorizeTree(E->getOperand(1)); @@ -3706,7 +3762,7 @@ return V; } case Instruction::FNeg: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *Op = vectorizeTree(E->getOperand(0)); @@ -3716,7 +3772,7 @@ } Value *V = Builder.CreateUnOp( - static_cast(S.getOpcode()), Op); + static_cast(E->getOpcode()), Op); propagateIRFlags(V, E->Scalars, VL0); if (auto *I = dyn_cast(V)) V = propagateMetadata(I, E->Scalars); @@ -3748,7 +3804,7 @@ case Instruction::And: case Instruction::Or: case Instruction::Xor: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *LHS = vectorizeTree(E->getOperand(0)); Value *RHS = vectorizeTree(E->getOperand(1)); @@ -3759,7 +3815,8 @@ } Value *V = Builder.CreateBinOp( - static_cast(S.getOpcode()), LHS, RHS); + static_cast(E->getOpcode()), LHS, + RHS); propagateIRFlags(V, E->Scalars, VL0); if (auto *I = dyn_cast(V)) V = propagateMetadata(I, E->Scalars); @@ -3777,11 +3834,9 @@ // Loads are inserted at the head of the tree because we don't want to // sink them all the way down past store instructions. bool IsReorder = !E->ReorderIndices.empty(); - if (IsReorder) { - S = getSameOpcode(E->Scalars, E->ReorderIndices.front()); - VL0 = cast(S.OpValue); - } - setInsertPointAfterBundle(E->Scalars, S); + if (E->updateStateIfReorder()) + VL0 = E->getMainOp(); + setInsertPointAfterBundle(E); LoadInst *LI = cast(VL0); Type *ScalarLoadTy = LI->getType(); @@ -3824,7 +3879,7 @@ unsigned Alignment = SI->getAlignment(); unsigned AS = SI->getPointerAddressSpace(); - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *VecValue = vectorizeTree(E->getOperand(0)); Value *ScalarPtr = SI->getPointerOperand(); @@ -3851,7 +3906,7 @@ return V; } case Instruction::GetElementPtr: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *Op0 = vectorizeTree(E->getOperand(0)); @@ -3878,7 +3933,7 @@ } case Instruction::Call: { CallInst *CI = cast(VL0); - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Intrinsic::ID IID = Intrinsic::not_intrinsic; if (Function *FI = CI->getCalledFunction()) @@ -3926,20 +3981,20 @@ return V; } case Instruction::ShuffleVector: { - assert(S.isAltShuffle() && - ((Instruction::isBinaryOp(S.getOpcode()) && - Instruction::isBinaryOp(S.getAltOpcode())) || - (Instruction::isCast(S.getOpcode()) && - Instruction::isCast(S.getAltOpcode()))) && + assert(E->isAltShuffle() && + ((Instruction::isBinaryOp(E->getOpcode()) && + Instruction::isBinaryOp(E->getAltOpcode())) || + (Instruction::isCast(E->getOpcode()) && + Instruction::isCast(E->getAltOpcode()))) && "Invalid Shuffle Vector Operand"); Value *LHS = nullptr, *RHS = nullptr; - if (Instruction::isBinaryOp(S.getOpcode())) { - setInsertPointAfterBundle(E->Scalars, S); + if (Instruction::isBinaryOp(E->getOpcode())) { + setInsertPointAfterBundle(E); LHS = vectorizeTree(E->getOperand(0)); RHS = vectorizeTree(E->getOperand(1)); } else { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); LHS = vectorizeTree(E->getOperand(0)); } @@ -3949,16 +4004,16 @@ } Value *V0, *V1; - if (Instruction::isBinaryOp(S.getOpcode())) { + if (Instruction::isBinaryOp(E->getOpcode())) { V0 = Builder.CreateBinOp( - static_cast(S.getOpcode()), LHS, RHS); + static_cast(E->getOpcode()), LHS, RHS); V1 = Builder.CreateBinOp( - static_cast(S.getAltOpcode()), LHS, RHS); + static_cast(E->getAltOpcode()), LHS, RHS); } else { V0 = Builder.CreateCast( - static_cast(S.getOpcode()), LHS, VecTy); + static_cast(E->getOpcode()), LHS, VecTy); V1 = Builder.CreateCast( - static_cast(S.getAltOpcode()), LHS, VecTy); + static_cast(E->getAltOpcode()), LHS, VecTy); } // Create shuffle to take alternate operations from the vector. @@ -3969,8 +4024,8 @@ SmallVector Mask(e); for (unsigned i = 0; i < e; ++i) { auto *OpInst = cast(E->Scalars[i]); - assert(S.isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode"); - if (OpInst->getOpcode() == S.getAltOpcode()) { + assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode"); + if (OpInst->getOpcode() == E->getAltOpcode()) { Mask[i] = Builder.getInt32(e + i); AltScalars.push_back(E->Scalars[i]); } else {