Index: lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- lib/Transforms/Vectorize/SLPVectorizer.cpp +++ lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -288,7 +288,6 @@ } } -/// true if the \p Value is odd, false otherwise. static bool isOdd(unsigned Value) { return Value & 1; } @@ -313,45 +312,75 @@ return OpValue; } -///\returns bool representing if Opcode \p Op can be part -/// of an alternate sequence which can later be merged as -/// a ShuffleVector instruction. -static bool canCombineAsAltInst(unsigned Op) { - return Op == Instruction::FAdd || Op == Instruction::FSub || - Op == Instruction::Sub || Op == Instruction::Add; -} +namespace { +/// Contains data for the instructions going to be vectorized. +struct RawInstructionsData { + /// Main Opcode of the instructions going to be vectorized. + unsigned Opcode = 0; + /// The list of instructions have some instructions with alternate opcodes. + bool HasAltOpcodes = false; +}; +} // namespace -/// \returns ShuffleVector instruction if instructions in \p VL have -/// alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence. -/// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...) -static unsigned isAltInst(ArrayRef VL) { - Instruction *I0 = dyn_cast(VL[0]); +/// Checks the list of the vectorized instructions \p VL and returns info about +/// this list. +static RawInstructionsData getMainOpcode(ArrayRef VL) { + auto *I0 = dyn_cast(VL[0]); + if (!I0) + return {}; + RawInstructionsData Res; unsigned Opcode = I0->getOpcode(); - unsigned AltOpcode = getAltOpcode(Opcode); - for (int i = 1, e = VL.size(); i < e; i++) { - Instruction *I = dyn_cast(VL[i]); - if (!I || I->getOpcode() != (isOdd(i) ? AltOpcode : Opcode)) - return 0; + // Walk through the list of the vectorized instructions + // in order to check its structure described by RawInstructionsData. + for (unsigned Cnt = 0, E = VL.size(); Cnt != E; ++Cnt) { + auto *I = dyn_cast(VL[Cnt]); + if (!I) + return {}; + if (Opcode != I->getOpcode()) + Res.HasAltOpcodes = true; } - return Instruction::ShuffleVector; + Res.Opcode = Opcode; + return Res; } -/// \returns The opcode if all of the Instructions in \p VL have the same -/// opcode, or zero. -static unsigned getSameOpcode(ArrayRef VL) { - Instruction *I0 = dyn_cast(VL[0]); - if (!I0) - return 0; - unsigned Opcode = I0->getOpcode(); - for (int i = 1, e = VL.size(); i < e; i++) { - Instruction *I = dyn_cast(VL[i]); - if (!I || Opcode != I->getOpcode()) { - if (canCombineAsAltInst(Opcode) && i == 1) - return isAltInst(VL); - return 0; +namespace { +/// Main data required for vectorization of instructions. +struct InstructionsState { + /// The very first instruction in the list with the main opcode. + Value *OpValue = nullptr; + /// The main opcode for the list of instructions. + unsigned Opcode = 0; + /// Some of the instructions in the list have alternate opcodes. + bool IsAltShuffle = false; + InstructionsState() = default; + InstructionsState(Value *OpValue, unsigned Opcode, bool IsAltShuffle) + : OpValue(OpValue), Opcode(Opcode), IsAltShuffle(IsAltShuffle) {} +}; +} // namespace + +/// \returns analysis of the Instructions in \p VL described in +/// InstructionsState, the Opcode that we suppose the whole list +/// could be vectorized even if its structure is diverse. +static InstructionsState getSameOpcode(ArrayRef VL) { + auto Res = getMainOpcode(VL); + unsigned Opcode = Res.Opcode; + if (!Res.HasAltOpcodes) + return InstructionsState(VL[0], Opcode, false); + auto *OpInst = cast(VL[0]); + unsigned AltOpcode = getAltOpcode(Opcode); + // Examine each element in the list instructions VL to determine + // if some operations there could be considered as an alternative + // (for example as subtraction relates to addition operation). + for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) { + auto *I = cast(VL[Cnt]); + unsigned InstOpcode = I->getOpcode(); + if ((Res.HasAltOpcodes && + InstOpcode != (isOdd(Cnt) ? AltOpcode : Opcode)) || + (!Res.HasAltOpcodes && InstOpcode != Opcode)) { + return InstructionsState(OpInst, 0, false); } } - return Opcode; + return InstructionsState(OpInst, Opcode, Res.HasAltOpcodes); } /// \returns true if all of the values in \p VL have the same type or false @@ -635,22 +664,31 @@ /// The TreeEntry index containing the user of this entry. We can actually /// have multiple users so the data structure is not truly a tree. SmallVector UserTreeIndices; + + /// Info about instruction in this tree entry. + InstructionsState State; }; /// Create a new VectorizableTree entry. TreeEntry *newTreeEntry(ArrayRef VL, bool Vectorized, - int &UserTreeIdx) { + int &UserTreeIdx, const InstructionsState &S) { + assert((!Vectorized || S.Opcode != 0) && + "Vectorized TreeEntry without opcode"); VectorizableTree.emplace_back(VectorizableTree); int idx = VectorizableTree.size() - 1; TreeEntry *Last = &VectorizableTree[idx]; Last->Scalars.insert(Last->Scalars.begin(), VL.begin(), VL.end()); Last->NeedToGather = !Vectorized; if (Vectorized) { + Last->State = S; for (int i = 0, e = VL.size(); i != e; ++i) { assert(!getTreeEntry(VL[i]) && "Scalar already in tree!"); ScalarToTreeEntry[VL[i]] = idx; } } else { + Last->State.Opcode = 0; + Last->State.OpValue = VL[0]; + Last->State.IsAltShuffle = false; MustGather.insert(VL.begin(), VL.end()); } @@ -1304,43 +1342,33 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, int UserTreeIdx) { - bool isAltShuffle = false; assert((allConstant(VL) || allSameType(VL)) && "Invalid types!"); + InstructionsState S = getSameOpcode(VL); if (Depth == RecursionMaxDepth) { DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } // Don't handle vectors. - if (VL[0]->getType()->isVectorTy()) { + if (S.OpValue->getType()->isVectorTy()) { DEBUG(dbgs() << "SLP: Gathering due to vector type.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } - if (StoreInst *SI = dyn_cast(VL[0])) + if (StoreInst *SI = dyn_cast(S.OpValue)) if (SI->getValueOperand()->getType()->isVectorTy()) { DEBUG(dbgs() << "SLP: Gathering due to store vector type.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } - unsigned Opcode = getSameOpcode(VL); - - // Check that this shuffle vector refers to the alternate - // sequence of opcodes. - if (Opcode == Instruction::ShuffleVector) { - Instruction *I0 = dyn_cast(VL[0]); - unsigned Op = I0->getOpcode(); - if (Op != Instruction::ShuffleVector) - isAltShuffle = true; - } // If all of the operands are identical or constant we have a simple solution. - if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !Opcode) { + if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !S.Opcode) { DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } @@ -1352,34 +1380,37 @@ if (EphValues.count(VL[i])) { DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << ") is ephemeral.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } } // Check if this is a duplicate of another entry. - if (TreeEntry *E = getTreeEntry(VL[0])) { + if (TreeEntry *E = getTreeEntry(S.OpValue)) { for (unsigned i = 0, e = VL.size(); i != e; ++i) { DEBUG(dbgs() << "SLP: \tChecking bundle: " << *VL[i] << ".\n"); if (E->Scalars[i] != VL[i]) { DEBUG(dbgs() << "SLP: Gathering due to partial overlap.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } } // Record the reuse of the tree node. FIXME, currently this is only used to // properly draw the graph rather than for the actual vectorization. E->UserTreeIndices.push_back(UserTreeIdx); - DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *VL[0] << ".\n"); + DEBUG(dbgs() << "SLP: Perfect diamond merge at " << *S.OpValue << ".\n"); return; } // Check that none of the instructions in the bundle are already in the tree. for (unsigned i = 0, e = VL.size(); i != e; ++i) { - if (ScalarToTreeEntry.count(VL[i])) { + auto *I = dyn_cast(VL[i]); + if (!I) + continue; + if (getTreeEntry(I)) { DEBUG(dbgs() << "SLP: The instruction (" << *VL[i] << ") is already in tree.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } } @@ -1389,21 +1420,21 @@ for (unsigned i = 0, e = VL.size(); i != e; ++i) { if (MustGather.count(VL[i])) { DEBUG(dbgs() << "SLP: Gathering due to gathered scalar.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } } // Check that all of the users of the scalars that we want to vectorize are // schedulable. - Instruction *VL0 = cast(VL[0]); + Instruction *VL0 = cast(S.OpValue); BasicBlock *BB = VL0->getParent(); if (!DT->isReachableFromEntry(BB)) { // Don't go into unreachable blocks. They may contain instructions with // dependency cycles which confuse the final scheduling. DEBUG(dbgs() << "SLP: bundle in unreachable block.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } @@ -1412,7 +1443,7 @@ for (unsigned j = i+1; j < e; ++j) if (VL[i] == VL[j]) { DEBUG(dbgs() << "SLP: Scalar used twice in bundle.\n"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } @@ -1422,17 +1453,19 @@ } BlockScheduling &BS = *BSRef.get(); - if (!BS.tryScheduleBundle(VL, this, VL0)) { + if (!BS.tryScheduleBundle(VL, this, S.OpValue)) { DEBUG(dbgs() << "SLP: We are not able to schedule this bundle!\n"); assert((!BS.getScheduleData(VL0) || !BS.getScheduleData(VL0)->isPartOfBundle()) && "tryScheduleBundle should cancelScheduling on failure"); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); - switch (Opcode) { + unsigned ShuffleOrOp = S.IsAltShuffle ? + (unsigned) Instruction::ShuffleVector : S.Opcode; + switch (ShuffleOrOp) { case Instruction::PHI: { PHINode *PH = dyn_cast(VL0); @@ -1444,12 +1477,12 @@ if (Term) { DEBUG(dbgs() << "SLP: Need to swizzle PHINodes (TerminatorInst use).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } } - newTreeEntry(VL, true, UserTreeIdx); + newTreeEntry(VL, true, UserTreeIdx, S); DEBUG(dbgs() << "SLP: added a vector of PHINodes.\n"); for (unsigned i = 0, e = PH->getNumIncomingValues(); i < e; ++i) { @@ -1471,7 +1504,7 @@ } else { BS.cancelScheduling(VL, VL0); } - newTreeEntry(VL, Reuse, UserTreeIdx); + newTreeEntry(VL, Reuse, UserTreeIdx, S); return; } case Instruction::Load: { @@ -1487,7 +1520,7 @@ if (DL->getTypeSizeInBits(ScalarTy) != DL->getTypeAllocSizeInBits(ScalarTy)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n"); return; } @@ -1498,7 +1531,7 @@ LoadInst *L = cast(VL[i]); if (!L->isSimple()) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n"); return; } @@ -1520,7 +1553,7 @@ if (Consecutive) { ++NumLoadsWantToKeepOrder; - newTreeEntry(VL, true, UserTreeIdx); + newTreeEntry(VL, true, UserTreeIdx, S); DEBUG(dbgs() << "SLP: added a vector of loads.\n"); return; } @@ -1535,7 +1568,7 @@ } BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); if (ReverseConsecutive) { ++NumLoadsWantToChangeOrder; @@ -1562,12 +1595,12 @@ Type *Ty = cast(VL[i])->getOperand(0)->getType(); if (Ty != SrcTy || !isValidElementType(Ty)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); DEBUG(dbgs() << "SLP: Gathering casts with different src types.\n"); return; } } - newTreeEntry(VL, true, UserTreeIdx); + newTreeEntry(VL, true, UserTreeIdx, S); DEBUG(dbgs() << "SLP: added a vector of casts.\n"); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { @@ -1590,13 +1623,13 @@ if (Cmp->getPredicate() != P0 || Cmp->getOperand(0)->getType() != ComparedTy) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); DEBUG(dbgs() << "SLP: Gathering cmp with different predicate.\n"); return; } } - newTreeEntry(VL, true, UserTreeIdx); + newTreeEntry(VL, true, UserTreeIdx, S); DEBUG(dbgs() << "SLP: added a vector of compares.\n"); for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { @@ -1628,14 +1661,14 @@ case Instruction::And: case Instruction::Or: case Instruction::Xor: { - newTreeEntry(VL, true, UserTreeIdx); + newTreeEntry(VL, true, UserTreeIdx, S); DEBUG(dbgs() << "SLP: added a vector of bin op.\n"); // Sort operands of the instructions so that each side is more likely to // have the same opcode. if (isa(VL0) && VL0->isCommutative()) { ValueList Left, Right; - reorderInputsAccordingToOpcode(VL0->getOpcode(), VL, Left, Right); + reorderInputsAccordingToOpcode(S.Opcode, VL, Left, Right); buildTree_rec(Left, Depth + 1, UserTreeIdx); buildTree_rec(Right, Depth + 1, UserTreeIdx); return; @@ -1657,7 +1690,7 @@ if (cast(VL[j])->getNumOperands() != 2) { DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } } @@ -1670,7 +1703,7 @@ if (Ty0 != CurTy) { DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } } @@ -1682,12 +1715,12 @@ DEBUG( dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); return; } } - newTreeEntry(VL, true, UserTreeIdx); + newTreeEntry(VL, true, UserTreeIdx, S); DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); for (unsigned i = 0, e = 2; i < e; ++i) { ValueList Operands; @@ -1704,12 +1737,12 @@ for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); return; } - newTreeEntry(VL, true, UserTreeIdx); + newTreeEntry(VL, true, UserTreeIdx, S); DEBUG(dbgs() << "SLP: added a vector of stores.\n"); ValueList Operands; @@ -1727,7 +1760,7 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); if (!isTriviallyVectorizable(ID)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } @@ -1741,7 +1774,7 @@ getVectorIntrinsicIDForCall(CI2, TLI) != ID || !CI->hasIdenticalOperandBundleSchema(*CI2)) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); DEBUG(dbgs() << "SLP: mismatched calls:" << *CI << "!=" << *VL[i] << "\n"); return; @@ -1752,7 +1785,7 @@ Value *A1J = CI2->getArgOperand(1); if (A1I != A1J) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI << " argument "<< A1I<<"!=" << A1J << "\n"); @@ -1765,14 +1798,14 @@ CI->op_begin() + CI->getBundleOperandsEndIndex(), CI2->op_begin() + CI2->getBundleOperandsStartIndex())) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); DEBUG(dbgs() << "SLP: mismatched bundle operands in calls:" << *CI << "!=" << *VL[i] << '\n'); return; } } - newTreeEntry(VL, true, UserTreeIdx); + newTreeEntry(VL, true, UserTreeIdx, S); for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) { ValueList Operands; // Prepare the operand vector. @@ -1787,19 +1820,19 @@ case Instruction::ShuffleVector: { // If this is not an alternate sequence of opcode like add-sub // then do not vectorize this instruction. - if (!isAltShuffle) { + if (!S.IsAltShuffle) { BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); return; } - newTreeEntry(VL, true, UserTreeIdx); + newTreeEntry(VL, true, UserTreeIdx, S); DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); // Reorder operands if reordering would enable vectorization. if (isa(VL0)) { ValueList Left, Right; - reorderAltShuffleOperands(VL0->getOpcode(), VL, Left, Right); + reorderAltShuffleOperands(S.Opcode, VL, Left, Right); buildTree_rec(Left, Depth + 1, UserTreeIdx); buildTree_rec(Right, Depth + 1, UserTreeIdx); return; @@ -1817,7 +1850,7 @@ } default: BS.cancelScheduling(VL, VL0); - newTreeEntry(VL, false, UserTreeIdx); + newTreeEntry(VL, false, UserTreeIdx, S); DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); return; } @@ -1852,7 +1885,7 @@ Instruction *E0 = cast(OpValue); assert(E0->getOpcode() == Instruction::ExtractElement || E0->getOpcode() == Instruction::ExtractValue); - assert(E0->getOpcode() == getSameOpcode(VL) && "Invalid opcode"); + assert(E0->getOpcode() == getSameOpcode(VL).Opcode && "Invalid opcode"); // Check if all of the extracts come from the same vector and from the // correct offset. Value *Vec = E0->getOperand(0); @@ -1916,7 +1949,7 @@ if (isSplat(VL)) { return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); } - if (getSameOpcode(VL) == Instruction::ExtractElement) { + if (getSameOpcode(VL).Opcode == Instruction::ExtractElement) { Optional ShuffleKind = isShuffle(VL); if (ShuffleKind.hasValue()) { int Cost = TTI->getShuffleCost(ShuffleKind.getValue(), VecTy); @@ -1938,16 +1971,17 @@ } return getGatherCost(E->Scalars); } - unsigned Opcode = getSameOpcode(VL); - assert(Opcode && allSameType(VL) && allSameBlock(VL) && "Invalid VL"); - Instruction *VL0 = cast(VL[0]); - switch (Opcode) { + assert(E->State.Opcode && allSameType(VL) && allSameBlock(VL) && "Invalid VL"); + Instruction *VL0 = cast(E->State.OpValue); + unsigned ShuffleOrOp = E->State.IsAltShuffle ? + (unsigned) Instruction::ShuffleVector : E->State.Opcode; + switch (ShuffleOrOp) { case Instruction::PHI: { return 0; } case Instruction::ExtractValue: case Instruction::ExtractElement: { - if (canReuseExtract(VL, VL0)) { + if (canReuseExtract(VL, E->State.OpValue)) { int DeadCost = 0; for (unsigned i = 0, e = VL.size(); i < e; ++i) { Instruction *E = cast(VL[i]); @@ -1991,8 +2025,8 @@ // Calculate the cost of this instruction. VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); int ScalarCost = VecTy->getNumElements() * - TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty(), VL0); - int VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy, VL0); + TTI->getCmpSelInstrCost(ShuffleOrOp, ScalarTy, Builder.getInt1Ty(), VL0); + int VecCost = TTI->getCmpSelInstrCost(ShuffleOrOp, VecTy, MaskTy, VL0); return VecCost - ScalarCost; } case Instruction::Add: @@ -2029,34 +2063,33 @@ // If instead not all operands are constants, then set the operand kind // to OK_AnyValue. If all operands are constants but not the same, // then set the operand kind to OK_NonUniformConstantValue. - ConstantInt *CInt = nullptr; - for (unsigned i = 0; i < VL.size(); ++i) { - const Instruction *I = cast(VL[i]); - if (!isa(I->getOperand(1))) { - Op2VK = TargetTransformInfo::OK_AnyValue; - break; - } - if (i == 0) { - CInt = cast(I->getOperand(1)); - continue; + if (auto *CInt = dyn_cast(VL0->getOperand(1))) { + const unsigned Opcode = E->State.Opcode; + for (auto *V : VL) { + auto *I = cast(V); + if (I == VL0 || Opcode != I->getOpcode()) + continue; + if (!isa(I->getOperand(1))) { + Op2VK = TargetTransformInfo::OK_AnyValue; + break; + } + if (Op2VK == TargetTransformInfo::OK_UniformConstantValue && + CInt != cast(I->getOperand(1))) + Op2VK = TargetTransformInfo::OK_NonUniformConstantValue; } + // FIXME: Currently cost of model modification for division by power of + // 2 is handled for X86 and AArch64. Add support for other targets. if (Op2VK == TargetTransformInfo::OK_UniformConstantValue && - CInt != cast(I->getOperand(1))) - Op2VK = TargetTransformInfo::OK_NonUniformConstantValue; - } - // FIXME: Currently cost of model modification for division by power of - // 2 is handled for X86 and AArch64. Add support for other targets. - if (Op2VK == TargetTransformInfo::OK_UniformConstantValue && CInt && - CInt->getValue().isPowerOf2()) - Op2VP = TargetTransformInfo::OP_PowerOf2; + CInt->getValue().isPowerOf2()) + Op2VP = TargetTransformInfo::OP_PowerOf2; + } else + Op2VK = TargetTransformInfo::OK_AnyValue; - SmallVector Operands(VL0->operand_values()); - int ScalarCost = - VecTy->getNumElements() * - TTI->getArithmeticInstrCost(Opcode, ScalarTy, Op1VK, Op2VK, Op1VP, - Op2VP, Operands); - int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy, Op1VK, Op2VK, - Op1VP, Op2VP, Operands); + int ScalarCost = VecTy->getNumElements() * + TTI->getArithmeticInstrCost(E->State.Opcode, ScalarTy, + Op1VK, Op2VK, Op1VP, Op2VP); + int VecCost = TTI->getArithmeticInstrCost(E->State.Opcode, VecTy, Op1VK, + Op2VK, Op1VP, Op2VP); return VecCost - ScalarCost; } case Instruction::GetElementPtr: { @@ -2122,23 +2155,18 @@ TargetTransformInfo::OK_AnyValue; TargetTransformInfo::OperandValueKind Op2VK = TargetTransformInfo::OK_AnyValue; - int ScalarCost = 0; - int VecCost = 0; - for (Value *i : VL) { - Instruction *I = cast(i); - if (!I) - break; - ScalarCost += - TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy, Op1VK, Op2VK); - } + unsigned AltOpcode = getAltOpcode(E->State.Opcode); + int ScalarCost = + TTI->getArithmeticInstrCost(E->State.Opcode, ScalarTy, Op1VK, Op2VK) * + VL.size() / 2; + ScalarCost += + TTI->getArithmeticInstrCost(AltOpcode, ScalarTy, Op1VK, Op2VK) * + VL.size() / 2; // VecCost is equal to sum of the cost of creating 2 vectors // and the cost of creating shuffle. - Instruction *I0 = cast(VL[0]); - VecCost = - TTI->getArithmeticInstrCost(I0->getOpcode(), VecTy, Op1VK, Op2VK); - Instruction *I1 = cast(VL[1]); - VecCost += - TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK); + int VecCost = + TTI->getArithmeticInstrCost(E->State.Opcode, VecTy, Op1VK, Op2VK); + VecCost += TTI->getArithmeticInstrCost(AltOpcode, VecTy, Op1VK, Op2VK); VecCost += TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0); return VecCost - ScalarCost; @@ -2205,7 +2233,7 @@ Instruction *PrevInst = nullptr; for (const auto &N : VectorizableTree) { - Instruction *Inst = dyn_cast(N.Scalars[0]); + Instruction *Inst = dyn_cast(N.State.OpValue); if (!Inst) continue; @@ -2265,7 +2293,7 @@ for (TreeEntry &TE : VectorizableTree) { int C = getEntryCost(&TE); DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with " - << *TE.Scalars[0] << ".\n"); + << *TE.State.OpValue << ".\n"); Cost += C; } @@ -2286,7 +2314,7 @@ // extend the extracted value back to the original type. Here, we account // for the extract and the added cost of the sign extend if needed. auto *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth); - auto *ScalarRoot = VectorizableTree[0].Scalars[0]; + auto *ScalarRoot = VectorizableTree[0].State.OpValue; if (MinBWs.count(ScalarRoot)) { auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first); auto Extend = @@ -2657,12 +2685,16 @@ } Value *BoUpSLP::vectorizeTree(ArrayRef VL) { - if (TreeEntry *E = getTreeEntry(VL[0])) - if (E->isSame(VL)) - return vectorizeTree(E); + InstructionsState S = getSameOpcode(VL); + if (S.Opcode) { + if (TreeEntry *E = getTreeEntry(S.OpValue)) { + if (E->isSame(VL)) + return vectorizeTree(E); + } + } - Type *ScalarTy = VL[0]->getType(); - if (StoreInst *SI = dyn_cast(VL[0])) + Type *ScalarTy = S.OpValue->getType(); + if (StoreInst *SI = dyn_cast(S.OpValue)) ScalarTy = SI->getValueOperand()->getType(); VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); @@ -2673,11 +2705,11 @@ IRBuilder<>::InsertPointGuard Guard(Builder); if (E->VectorizedValue) { - DEBUG(dbgs() << "SLP: Diamond merged for " << *E->Scalars[0] << ".\n"); + DEBUG(dbgs() << "SLP: Diamond merged for " << *E->State.OpValue << ".\n"); return E->VectorizedValue; } - Instruction *VL0 = cast(E->Scalars[0]); + Instruction *VL0 = cast(E->State.OpValue); Type *ScalarTy = VL0->getType(); if (StoreInst *SI = dyn_cast(VL0)) ScalarTy = SI->getValueOperand()->getType(); @@ -2690,9 +2722,9 @@ return V; } - unsigned Opcode = getSameOpcode(E->Scalars); - - switch (Opcode) { + unsigned ShuffleOrOp = E->State.IsAltShuffle ? + (unsigned) Instruction::ShuffleVector : E->State.Opcode; + switch (ShuffleOrOp) { case Instruction::PHI: { PHINode *PH = dyn_cast(VL0); Builder.SetInsertPoint(PH->getParent()->getFirstNonPHI()); @@ -2801,7 +2833,7 @@ CmpInst::Predicate P0 = cast(VL0)->getPredicate(); Value *V; - if (Opcode == Instruction::FCmp) + if (E->State.Opcode == Instruction::FCmp) V = Builder.CreateFCmp(P0, L, R); else V = Builder.CreateICmp(P0, L, R); @@ -2853,8 +2885,8 @@ case Instruction::Xor: { ValueList LHSVL, RHSVL; if (isa(VL0) && VL0->isCommutative()) - reorderInputsAccordingToOpcode(VL0->getOpcode(), - E->Scalars, LHSVL, RHSVL); + reorderInputsAccordingToOpcode(E->State.Opcode, E->Scalars, LHSVL, + RHSVL); else for (Value *V : E->Scalars) { auto *I = cast(V); @@ -2870,8 +2902,8 @@ if (Value *V = alreadyVectorized(E->Scalars, VL0)) return V; - BinaryOperator *BinOp = cast(VL0); - Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS, RHS); + Value *V = Builder.CreateBinOp( + static_cast(E->State.Opcode), LHS, RHS); E->VectorizedValue = V; propagateIRFlags(E->VectorizedValue, E->Scalars, VL0); ++NumVectorInstructions; @@ -3022,8 +3054,9 @@ } case Instruction::ShuffleVector: { ValueList LHSVL, RHSVL; - assert(isa(VL0) && "Invalid Shuffle Vector Operand"); - reorderAltShuffleOperands(VL0->getOpcode(), E->Scalars, LHSVL, RHSVL); + assert(Instruction::isBinaryOp(E->State.Opcode) && + "Invalid Shuffle Vector Operand"); + reorderAltShuffleOperands(E->State.Opcode, E->Scalars, LHSVL, RHSVL); setInsertPointAfterBundle(E->Scalars, VL0); Value *LHS = vectorizeTree(LHSVL); @@ -3033,13 +3066,13 @@ return V; // Create a vector of LHS op1 RHS - BinaryOperator *BinOp0 = cast(VL0); - Value *V0 = Builder.CreateBinOp(BinOp0->getOpcode(), LHS, RHS); + Value *V0 = Builder.CreateBinOp( + static_cast(E->State.Opcode), LHS, RHS); + unsigned AltOpcode = getAltOpcode(E->State.Opcode); // Create a vector of LHS op2 RHS - Instruction *VL1 = cast(E->Scalars[1]); - BinaryOperator *BinOp1 = cast(VL1); - Value *V1 = Builder.CreateBinOp(BinOp1->getOpcode(), LHS, RHS); + Value *V1 = Builder.CreateBinOp( + static_cast(AltOpcode), LHS, RHS); // Create shuffle to take alternate operations from the vector. // Also, gather up odd and even scalar ops to propagate IR flags to @@ -3094,7 +3127,7 @@ // If the vectorized tree can be rewritten in a smaller type, we truncate the // vectorized root. InstCombine will then rewrite the entire expression. We // sign extend the extracted values below. - auto *ScalarRoot = VectorizableTree[0].Scalars[0]; + auto *ScalarRoot = VectorizableTree[0].State.OpValue; if (MinBWs.count(ScalarRoot)) { if (auto *I = dyn_cast(VectorRoot)) Builder.SetInsertPoint(&*++BasicBlock::iterator(I)); @@ -3203,7 +3236,7 @@ continue; assert(Entry->VectorizedValue && "Can't find vectorizable value"); - + // For each lane: for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { Value *Scalar = Entry->Scalars[Lane]; @@ -3526,12 +3559,6 @@ for (Instruction *I = FromI; I != ToI; I = I->getNextNode()) { ScheduleData *SD = ScheduleDataMap[I]; if (!SD) { - // Allocate a new ScheduleData for the instruction. - if (ChunkPos >= ChunkSize) { - ScheduleDataChunks.push_back( - llvm::make_unique(ChunkSize)); - ChunkPos = 0; - } SD = allocateScheduleDataChunks(); ScheduleDataMap[I] = SD; SD->Inst = I; @@ -3722,7 +3749,7 @@ I = I->getNextNode()) { BS->doForAllOpcodes(I, [this, &Idx, &NumToSchedule, BS](ScheduleData *SD) { assert(SD->isPartOfBundle() == - (getTreeEntry(SD->Inst) != nullptr) && + (getTreeEntry(SD->OpValue) != nullptr) && "scheduler and vectorizer bundle mismatch"); SD->FirstInBundle->SchedulingPriority = Idx++; if (SD->isSchedulingEntity()) {