Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1352,8 +1352,7 @@ /// Set the Builder insert point to one after the last instruction in /// the bundle - void setInsertPointAfterBundle(ArrayRef VL, - const InstructionsState &S); + void setInsertPointAfterBundle(TreeEntry *E); /// \returns a vector from a collection of scalars in \p VL. Value *Gather(ArrayRef VL, VectorType *Ty); @@ -1390,6 +1389,10 @@ /// The Scalars are vectorized into this value. It is initialized to Null. Value *VectorizedValue = nullptr; + /// The main/alternate instruction. + Instruction *MainOp = nullptr; + Instruction *AltOp = nullptr; + /// Do we need to gather this sequence ? bool NeedToGather = false; @@ -1456,6 +1459,24 @@ return Operands[OpIdx][0]; } + /// Some of the instructions in the list have alternate opcodes. + bool isAltShuffle() const { return MainOp->getOpcode() != AltOp->getOpcode(); } + + bool isOpcodeOrAlt(Instruction *I) const { + unsigned CheckedOpcode = I->getOpcode(); + return (MainOp->getOpcode() == CheckedOpcode || AltOp->getOpcode() == CheckedOpcode); + } + + /// Chooses the correct key for scheduling data. If \p Op has the same (or + /// alternate) opcode as \p OpValue, the key is \p Op. Otherwise the key is \p + /// OpValue. + Value *isOneOf(Value *Op) { + auto *I = dyn_cast(Op); + if (I && isOpcodeOrAlt(I)) + return Op; + return MainOp; + } + #ifndef NDEBUG /// Debug printer. LLVM_DUMP_METHOD void dump() const { @@ -1496,7 +1517,7 @@ /// Create a new VectorizableTree entry. TreeEntry *newTreeEntry(ArrayRef VL, bool Vectorized, - const EdgeInfo &UserTreeIdx, + InstructionsState *S, const EdgeInfo &UserTreeIdx, ArrayRef ReuseShuffleIndices = None, ArrayRef ReorderIndices = None) { VectorizableTree.push_back(llvm::make_unique(VectorizableTree)); @@ -1507,10 +1528,12 @@ Last->ReuseShuffleIndices.append(ReuseShuffleIndices.begin(), ReuseShuffleIndices.end()); Last->ReorderIndices = ReorderIndices; + Last->MainOp = S->MainOp; + Last->AltOp = S->AltOp; if (Vectorized) { for (int i = 0, e = VL.size(); i != e; ++i) { assert(!getTreeEntry(VL[i]) && "Scalar already in tree!"); - ScalarToTreeEntry[VL[i]] = Last->Idx; + ScalarToTreeEntry[cast(VL[i])] = Last; } } else { MustGather.insert(VL.begin(), VL.end()); @@ -1538,21 +1561,23 @@ #endif TreeEntry *getTreeEntry(Value *V) { - auto I = ScalarToTreeEntry.find(V); - if (I != ScalarToTreeEntry.end()) - return VectorizableTree[I->second].get(); + auto *I = dyn_cast(V); + auto It = ScalarToTreeEntry.find(I); + if (It != ScalarToTreeEntry.end()) + return It->second; return nullptr; } const TreeEntry *getTreeEntry(Value *V) const { - auto I = ScalarToTreeEntry.find(V); - if (I != ScalarToTreeEntry.end()) - return VectorizableTree[I->second].get(); + auto *I = dyn_cast(V); + auto It = ScalarToTreeEntry.find(I); + if (It != ScalarToTreeEntry.end()) + return It->second; return nullptr; } /// Maps a specific scalar to its tree entry. - SmallDenseMap ScalarToTreeEntry; + SmallDenseMap ScalarToTreeEntry; /// A list of scalars that we found that we need to keep as scalars. ValueSet MustGather; @@ -2226,28 +2251,28 @@ InstructionsState S = getSameOpcode(VL); if (Depth == RecursionMaxDepth) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } // Don't handle vectors. if (S.OpValue->getType()->isVectorTy()) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } if (StoreInst *SI = dyn_cast(S.OpValue)) if (SI->getValueOperand()->getType()->isVectorTy()) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } // If all of the operands are identical or constant we have a simple solution. if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.getOpcode()) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } @@ -2259,7 +2284,7 @@ if (EphValues.count(VL[i])) { LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << ") is ephemeral.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } } @@ -2269,7 +2294,7 @@ LLVM_DEBUG(dbgs() << "SLP: \tChecking bundle: " << *S.OpValue << ".\n"); if (!E->isSame(VL)) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } // Record the reuse of the tree node. FIXME, currently this is only used to @@ -2289,7 +2314,7 @@ if (getTreeEntry(I)) { LLVM_DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << ") is already in tree.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } } @@ -2300,7 +2325,7 @@ for (unsigned i = 0, e = VL.size(); i != e; ++i) { if (MustGather.count(VL[i]) || is_contained(UserIgnoreList, VL[i])) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } } @@ -2314,7 +2339,7 @@ // Don't go into unreachable blocks. They may contain instructions with // dependency cycles which confuse the final scheduling. LLVM_DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } @@ -2334,7 +2359,7 @@ LLVM_DEBUG(dbgs() << "SLP: Shuffle for reused scalars.\n"); if (UniqueValues.size() <= 1 || !llvm::isPowerOf2_32(UniqueValues.size())) { LLVM_DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, &S, UserTreeIdx); return; } VL = UniqueValues; @@ -2351,7 +2376,7 @@ assert((!BS.getScheduleData(VL0) || !BS.getScheduleData(VL0)->isPartOfBundle()) && "tryScheduleBundle should cancelScheduling on failure"); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); return; } LLVM_DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); @@ -2372,12 +2397,12 @@ LLVM_DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (terminator use).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n"); for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { @@ -2398,7 +2423,7 @@ if (Reuse) { LLVM_DEBUG(dbgs() << "SLP: Reusing or shuffling extract sequence.\n"); ++NumOpsWantToKeepOriginalOrder; - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, + newTreeEntry(VL, /*Vectorized=*/true, &S, UserTreeIdx, ReuseShuffleIndicies); // This is a special case, as it does not gather, but at the same time // we are not extending buildTree_rec() towards the operands. @@ -2420,7 +2445,7 @@ auto StoredCurrentOrderAndNum = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first; ++StoredCurrentOrderAndNum->getSecond(); - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, ReuseShuffleIndicies, + newTreeEntry(VL, /*Vectorized=*/true, &S, UserTreeIdx, ReuseShuffleIndicies, StoredCurrentOrderAndNum->getFirst()); // This is a special case, as it does not gather, but at the same time // we are not extending buildTree_rec() towards the operands. @@ -2430,7 +2455,7 @@ return; } LLVM_DEBUG(dbgs() << "SLP: Gather extract sequence.\n"); - newTreeEntry(VL, /*Vectorized=*/false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, /*Vectorized=*/false, &S, UserTreeIdx, ReuseShuffleIndicies); BS.cancelScheduling(VL, VL0); return; } @@ -2446,7 +2471,7 @@ if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); return; } @@ -2459,7 +2484,7 @@ auto *L = cast(V); if (!L->isSimple()) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); return; } @@ -2489,14 +2514,14 @@ if (CurrentOrder.empty()) { // Original loads are consecutive and does not require reordering. ++NumOpsWantToKeepOriginalOrder; - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, + newTreeEntry(VL, /*Vectorized=*/true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n"); } else { // Need to reorder. auto I = NumOpsWantToKeepOrder.try_emplace(CurrentOrder).first; ++I->getSecond(); - newTreeEntry(VL, /*Vectorized=*/true, UserTreeIdx, + newTreeEntry(VL, /*Vectorized=*/true, &S, UserTreeIdx, ReuseShuffleIndicies, I->getFirst()); LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n"); } @@ -2506,7 +2531,7 @@ LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); return; } case Instruction::ZExt: @@ -2526,13 +2551,13 @@ Type *Ty = cast(VL[i])->getOperand(0)->getType(); if (Ty != SrcTy || !isValidElementType(Ty)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n"); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { @@ -2556,14 +2581,14 @@ if ((Cmp->getPredicate() != P0 && Cmp->getPredicate() != SwapP0) || Cmp->getOperand(0)->getType() != ComparedTy) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n"); ValueList Left, Right; @@ -2609,7 +2634,7 @@ case Instruction::And: case Instruction::Or: case Instruction::Xor: { - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n"); // Sort operands of the instructions so that each side is more likely to @@ -2638,7 +2663,7 @@ if (cast(VL[j])->getNumOperands() != 2) { LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); return; } } @@ -2652,7 +2677,7 @@ LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); return; } } @@ -2664,12 +2689,12 @@ LLVM_DEBUG(dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); for (unsigned i = 0, e = 2; i < e; ++i) { ValueList Operands; @@ -2686,12 +2711,12 @@ for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); return; } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n"); ValueList Operands; @@ -2709,7 +2734,7 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (!isTriviallyVectorizable(ID)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } @@ -2725,7 +2750,7 @@ getVectorIntrinsicIDForCall(CI2, TLI) != ID || !CI->hasIdenticalOperandBundleSchema(*CI2)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] << "\n"); return; @@ -2737,7 +2762,7 @@ Value *A1J = CI2->getArgOperand(j); if (ScalarArgs[j] != A1J) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI << " argument " << ScalarArgs[j] << "!=" << A1J << "\n"); @@ -2751,14 +2776,14 @@ CI->op_begin() + CI->getBundleOperandsEndIndex(), CI2->op_begin() + CI2->getBundleOperandsStartIndex())) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!=" << *VL[i] << '\n'); return; } } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) { ValueList Operands; // Prepare the operand vector. @@ -2775,11 +2800,11 @@ // then do not vectorize this instruction. if (!S.isAltShuffle()) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); return; } - auto *TE = newTreeEntry(VL, true, UserTreeIdx, ReuseShuffleIndicies); + auto *TE = newTreeEntry(VL, true, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); // Reorder operands if reordering would enable vectorization. @@ -2803,7 +2828,7 @@ } default: BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx, ReuseShuffleIndicies); + newTreeEntry(VL, false, &S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); return; } @@ -2904,7 +2929,7 @@ bool BoUpSLP::areAllUsersVectorized(Instruction *I) const { return I->hasOneUse() || std::all_of(I->user_begin(), I->user_end(), [this](User *U) { - return ScalarToTreeEntry.count(U) > 0; + return ScalarToTreeEntry.count(cast(U)) > 0; }); } @@ -2948,10 +2973,10 @@ // instruction itself is not going to be vectorized, consider this // instruction as dead and remove its cost from the final cost of the // vectorized tree. - if (areAllUsersVectorized(cast(V)) && - !ScalarToTreeEntry.count(V)) { + auto *I = cast(V); + if (areAllUsersVectorized(I) && !ScalarToTreeEntry.count(I)) { auto *IO = cast( - cast(V)->getIndexOperand()); + cast(I)->getIndexOperand()); Cost -= TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, IO->getZExtValue()); } @@ -3517,16 +3542,16 @@ Right = Ops.getVL(1); } -void BoUpSLP::setInsertPointAfterBundle(ArrayRef VL, - const InstructionsState &S) { +void BoUpSLP::setInsertPointAfterBundle(TreeEntry *E) { // Get the basic block this bundle is in. All instructions in the bundle // should be in this block. - auto *Front = cast(S.OpValue); + auto *Front = E->MainOp; auto *BB = Front->getParent(); - assert(llvm::all_of(make_range(VL.begin(), VL.end()), [=](Value *V) -> bool { - auto *I = cast(V); - return !S.isOpcodeOrAlt(I) || I->getParent() == BB; - })); + assert(llvm::all_of(make_range(E->Scalars.begin(), E->Scalars.end()), + [=](Value *V) -> bool { + auto *I = cast(V); + return !E->isOpcodeOrAlt(I) || I->getParent() == BB; + })); // The last instruction in the bundle in program order. Instruction *LastInst = nullptr; @@ -3537,7 +3562,7 @@ // bundle. The end of the bundle is marked by null ScheduleData. if (BlocksSchedules.count(BB)) { auto *Bundle = - BlocksSchedules[BB]->getScheduleData(isOneOf(S, VL.back())); + BlocksSchedules[BB]->getScheduleData(E->isOneOf(E->Scalars.back())); if (Bundle && Bundle->isPartOfBundle()) for (; Bundle; Bundle = Bundle->NextInBundle) if (Bundle->OpValue == Bundle->Inst) @@ -3563,9 +3588,9 @@ // we both exit early from buildTree_rec and that the bundle be out-of-order // (causing us to iterate all the way to the end of the block). if (!LastInst) { - SmallPtrSet Bundle(VL.begin(), VL.end()); + SmallPtrSet Bundle(E->Scalars.begin(), E->Scalars.end()); for (auto &I : make_range(BasicBlock::iterator(Front), BB->end())) { - if (Bundle.erase(&I) && S.isOpcodeOrAlt(&I)) + if (Bundle.erase(&I) && E->isOpcodeOrAlt(&I)) LastInst = &I; if (Bundle.empty()) break; @@ -3692,8 +3717,7 @@ return E->VectorizedValue; } - InstructionsState S = getSameOpcode(E->Scalars); - Instruction *VL0 = cast(S.OpValue); + Instruction *VL0 = E->MainOp; Type *ScalarTy = VL0->getType(); if (StoreInst *SI = dyn_cast(VL0)) ScalarTy = SI->getValueOperand()->getType(); @@ -3702,7 +3726,7 @@ bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); if (E->NeedToGather) { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3716,8 +3740,8 @@ return V; } - unsigned ShuffleOrOp = S.isAltShuffle() ? - (unsigned) Instruction::ShuffleVector : S.getOpcode(); + unsigned ShuffleOrOp = E->isAltShuffle() ? + (unsigned) Instruction::ShuffleVector : E->MainOp->getOpcode(); switch (ShuffleOrOp) { case Instruction::PHI: { PHINode *PH = dyn_cast(VL0); @@ -3775,7 +3799,7 @@ E->VectorizedValue = V; return V; } - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3810,7 +3834,7 @@ E->VectorizedValue = NewV; return NewV; } - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); auto *V = Gather(E->Scalars, VecTy); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), @@ -3835,7 +3859,7 @@ case Instruction::Trunc: case Instruction::FPTrunc: case Instruction::BitCast: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *InVec = vectorizeTree(E->getOperand(0)); @@ -3856,7 +3880,7 @@ } case Instruction::FCmp: case Instruction::ICmp: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *L = vectorizeTree(E->getOperand(0)); Value *R = vectorizeTree(E->getOperand(1)); @@ -3868,7 +3892,7 @@ CmpInst::Predicate P0 = cast(VL0)->getPredicate(); Value *V; - if (S.getOpcode() == Instruction::FCmp) + if (E->MainOp->getOpcode() == Instruction::FCmp) V = Builder.CreateFCmp(P0, L, R); else V = Builder.CreateICmp(P0, L, R); @@ -3883,7 +3907,7 @@ return V; } case Instruction::Select: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *Cond = vectorizeTree(E->getOperand(0)); Value *True = vectorizeTree(E->getOperand(1)); @@ -3904,7 +3928,7 @@ return V; } case Instruction::FNeg: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *Op = vectorizeTree(E->getOperand(0)); @@ -3914,7 +3938,7 @@ } Value *V = Builder.CreateUnOp( - static_cast(S.getOpcode()), Op); + static_cast(E->MainOp->getOpcode()), Op); propagateIRFlags(V, E->Scalars, VL0); if (auto *I = dyn_cast(V)) V = propagateMetadata(I, E->Scalars); @@ -3946,7 +3970,7 @@ case Instruction::And: case Instruction::Or: case Instruction::Xor: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *LHS = vectorizeTree(E->getOperand(0)); Value *RHS = vectorizeTree(E->getOperand(1)); @@ -3957,7 +3981,8 @@ } Value *V = Builder.CreateBinOp( - static_cast(S.getOpcode()), LHS, RHS); + static_cast(E->MainOp->getOpcode()), LHS, + RHS); propagateIRFlags(V, E->Scalars, VL0); if (auto *I = dyn_cast(V)) V = propagateMetadata(I, E->Scalars); @@ -3976,10 +4001,13 @@ // sink them all the way down past store instructions. bool IsReorder = !E->ReorderIndices.empty(); if (IsReorder) { - S = getSameOpcode(E->Scalars, E->ReorderIndices.front()); - VL0 = cast(S.OpValue); + InstructionsState S = + getSameOpcode(E->Scalars, E->ReorderIndices.front()); + E->MainOp = S.MainOp; + E->AltOp = S.AltOp; + VL0 = S.MainOp; } - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); LoadInst *LI = cast(VL0); Type *ScalarLoadTy = LI->getType(); @@ -4022,7 +4050,7 @@ unsigned Alignment = SI->getAlignment(); unsigned AS = SI->getPointerAddressSpace(); - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *VecValue = vectorizeTree(E->getOperand(0)); Value *ScalarPtr = SI->getPointerOperand(); @@ -4049,7 +4077,7 @@ return V; } case Instruction::GetElementPtr: { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Value *Op0 = vectorizeTree(E->getOperand(0)); @@ -4076,7 +4104,7 @@ } case Instruction::Call: { CallInst *CI = cast(VL0); - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); Function *FI; Intrinsic::ID IID = Intrinsic::not_intrinsic; Value *ScalarArg = nullptr; @@ -4124,20 +4152,20 @@ return V; } case Instruction::ShuffleVector: { - assert(S.isAltShuffle() && - ((Instruction::isBinaryOp(S.getOpcode()) && - Instruction::isBinaryOp(S.getAltOpcode())) || - (Instruction::isCast(S.getOpcode()) && - Instruction::isCast(S.getAltOpcode()))) && + assert(E->isAltShuffle() && + ((Instruction::isBinaryOp(E->MainOp->getOpcode()) && + Instruction::isBinaryOp(E->AltOp->getOpcode())) || + (Instruction::isCast(E->MainOp->getOpcode()) && + Instruction::isCast(E->AltOp->getOpcode()))) && "Invalid Shuffle Vector Operand"); Value *LHS, *RHS; - if (Instruction::isBinaryOp(S.getOpcode())) { - setInsertPointAfterBundle(E->Scalars, S); + if (Instruction::isBinaryOp(E->MainOp->getOpcode())) { + setInsertPointAfterBundle(E); LHS = vectorizeTree(E->getOperand(0)); RHS = vectorizeTree(E->getOperand(1)); } else { - setInsertPointAfterBundle(E->Scalars, S); + setInsertPointAfterBundle(E); LHS = vectorizeTree(E->getOperand(0)); } @@ -4147,16 +4175,20 @@ } Value *V0, *V1; - if (Instruction::isBinaryOp(S.getOpcode())) { + if (Instruction::isBinaryOp(E->MainOp->getOpcode())) { V0 = Builder.CreateBinOp( - static_cast(S.getOpcode()), LHS, RHS); + static_cast(E->MainOp->getOpcode()), LHS, + RHS); V1 = Builder.CreateBinOp( - static_cast(S.getAltOpcode()), LHS, RHS); + static_cast(E->AltOp->getOpcode()), LHS, + RHS); } else { V0 = Builder.CreateCast( - static_cast(S.getOpcode()), LHS, VecTy); + static_cast(E->MainOp->getOpcode()), LHS, + VecTy); V1 = Builder.CreateCast( - static_cast(S.getAltOpcode()), LHS, VecTy); + static_cast(E->AltOp->getOpcode()), LHS, + VecTy); } // Create shuffle to take alternate operations from the vector. @@ -4167,8 +4199,8 @@ SmallVector Mask(e); for (unsigned i = 0; i < e; ++i) { auto *OpInst = cast(E->Scalars[i]); - assert(S.isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode"); - if (OpInst->getOpcode() == S.getAltOpcode()) { + assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode"); + if (OpInst->getOpcode() == E->AltOp->getOpcode()) { Mask[i] = Builder.getInt32(e + i); AltScalars.push_back(E->Scalars[i]); } else {