diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -473,24 +473,26 @@ } static InstructionsState getSameOpcode(ArrayRef VL, + const TargetLibraryInfo &TLI, unsigned BaseIndex = 0); /// Checks if the provided operands of 2 cmp instructions are compatible, i.e. /// compatible instructions or constants, or just some other regular values. static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0, - Value *Op1) { + Value *Op1, const TargetLibraryInfo &TLI) { return (isConstant(BaseOp0) && isConstant(Op0)) || (isConstant(BaseOp1) && isConstant(Op1)) || (!isa(BaseOp0) && !isa(Op0) && !isa(BaseOp1) && !isa(Op1)) || - getSameOpcode({BaseOp0, Op0}).getOpcode() || - getSameOpcode({BaseOp1, Op1}).getOpcode(); + getSameOpcode({BaseOp0, Op0}, TLI).getOpcode() || + getSameOpcode({BaseOp1, Op1}, TLI).getOpcode(); } /// \returns analysis of the Instructions in \p VL described in /// InstructionsState, the Opcode that we suppose the whole list /// could be vectorized even if its structure is diverse. static InstructionsState getSameOpcode(ArrayRef VL, + const TargetLibraryInfo &TLI, unsigned BaseIndex) { // Make sure these are all Instructions. if (llvm::any_of(VL, [](Value *V) { return !isa(V); })) @@ -508,9 +510,19 @@ // Check for one alternate opcode from another BinaryOperator. // TODO - generalize to support all operators (types, calls etc.). + auto *IBase = cast(VL[BaseIndex]); + Intrinsic::ID BaseID = 0; + SmallVector BaseMappings; + if (auto *CallBase = dyn_cast(IBase)) { + BaseID = getVectorIntrinsicIDForCall(CallBase, &TLI); + BaseMappings = VFDatabase(*CallBase).getMappings(*CallBase); + if (!isTriviallyVectorizable(BaseID) && BaseMappings.empty()) + return InstructionsState(VL[BaseIndex], nullptr, nullptr); + } for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) { - unsigned InstOpcode = cast(VL[Cnt])->getOpcode(); - if (IsBinOp && isa(VL[Cnt])) { + auto *I = cast(VL[Cnt]); + unsigned InstOpcode = I->getOpcode(); + if (IsBinOp && isa(I)) { if (InstOpcode == Opcode || InstOpcode == AltOpcode) continue; if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) && @@ -519,9 +531,11 @@ AltIndex = Cnt; continue; } - } else if (IsCastOp && isa(VL[Cnt])) { - Type *Ty0 = cast(VL[BaseIndex])->getOperand(0)->getType(); - Type *Ty1 = cast(VL[Cnt])->getOperand(0)->getType(); + } else if (IsCastOp && isa(I)) { + Value *Op0 = IBase->getOperand(0); + Type *Ty0 = Op0->getType(); + Value *Op1 = I->getOperand(0); + Type *Ty1 = Op1->getType(); if (Ty0 == Ty1) { if (InstOpcode == Opcode || InstOpcode == AltOpcode) continue; @@ -552,10 +566,10 @@ // compatible - need to perform alternate vectorization. if (InstOpcode == Opcode) { if (BasePred == CurrentPred && - areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1)) + areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1, TLI)) continue; if (BasePred == SwappedCurrentPred && - areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0)) + areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0, TLI)) continue; if (E == 2 && (BasePred == CurrentPred || BasePred == SwappedCurrentPred)) @@ -566,10 +580,10 @@ Value *AltOp1 = AltInst->getOperand(1); // Check if operands are compatible with alternate operands. if (AltPred == CurrentPred && - areCompatibleCmpOps(AltOp0, AltOp1, Op0, Op1)) + areCompatibleCmpOps(AltOp0, AltOp1, Op0, Op1, TLI)) continue; if (AltPred == SwappedCurrentPred && - areCompatibleCmpOps(AltOp0, AltOp1, Op1, Op0)) + areCompatibleCmpOps(AltOp0, AltOp1, Op1, Op0, TLI)) continue; } if (BaseIndex == AltIndex && BasePred != CurrentPred) { @@ -585,8 +599,45 @@ AltPred == CurrentPred || AltPred == SwappedCurrentPred) continue; } - } else if (InstOpcode == Opcode || InstOpcode == AltOpcode) + } else if (InstOpcode == Opcode || InstOpcode == AltOpcode) { + if (auto *Gep = dyn_cast(I)) { + if (Gep->getNumOperands() != 2 || + Gep->getOperand(0)->getType() != IBase->getOperand(0)->getType()) + return InstructionsState(VL[BaseIndex], nullptr, nullptr); + } else if (auto *EI = dyn_cast(I)) { + if (!isVectorLikeInstWithConstOps(EI)) + return InstructionsState(VL[BaseIndex], nullptr, nullptr); + } else if (auto *LI = dyn_cast(I)) { + auto *BaseLI = cast(IBase); + if (!LI->isSimple() || !BaseLI->isSimple()) + return InstructionsState(VL[BaseIndex], nullptr, nullptr); + } else if (auto *Call = dyn_cast(I)) { + auto *CallBase = cast(IBase); + if (Call->getCalledFunction() != CallBase->getCalledFunction()) + return InstructionsState(VL[BaseIndex], nullptr, nullptr); + if (Call->hasOperandBundles() && + !std::equal(Call->op_begin() + Call->getBundleOperandsStartIndex(), + Call->op_begin() + Call->getBundleOperandsEndIndex(), + CallBase->op_begin() + + CallBase->getBundleOperandsStartIndex())) + return InstructionsState(VL[BaseIndex], nullptr, nullptr); + Intrinsic::ID ID = getVectorIntrinsicIDForCall(Call, &TLI); + if (ID != BaseID) + return InstructionsState(VL[BaseIndex], nullptr, nullptr); + if (!ID) { + SmallVector Mappings = VFDatabase(*Call).getMappings(*Call); + if (Mappings.size() != BaseMappings.size() || + Mappings.front().ISA != BaseMappings.front().ISA || + Mappings.front().ScalarName != BaseMappings.front().ScalarName || + Mappings.front().VectorName != BaseMappings.front().VectorName || + Mappings.front().Shape.VF != BaseMappings.front().Shape.VF || + Mappings.front().Shape.Parameters != + BaseMappings.front().Shape.Parameters) + return InstructionsState(VL[BaseIndex], nullptr, nullptr); + } + } continue; + } return InstructionsState(VL[BaseIndex], nullptr, nullptr); } @@ -1052,6 +1103,7 @@ /// A helper class used for scoring candidates for two consecutive lanes. class LookAheadHeuristics { + const TargetLibraryInfo &TLI; const DataLayout &DL; ScalarEvolution &SE; const BoUpSLP &R; @@ -1059,9 +1111,11 @@ int MaxLevel; // The maximum recursion depth for accumulating score. public: - LookAheadHeuristics(const DataLayout &DL, ScalarEvolution &SE, - const BoUpSLP &R, int NumLanes, int MaxLevel) - : DL(DL), SE(SE), R(R), NumLanes(NumLanes), MaxLevel(MaxLevel) {} + LookAheadHeuristics(const TargetLibraryInfo &TLI, const DataLayout &DL, + ScalarEvolution &SE, const BoUpSLP &R, int NumLanes, + int MaxLevel) + : TLI(TLI), DL(DL), SE(SE), R(R), NumLanes(NumLanes), + MaxLevel(MaxLevel) {} // The hard-coded scores listed here are not very important, though it shall // be higher for better matches to improve the resulting cost. When @@ -1137,7 +1191,8 @@ auto *LI1 = dyn_cast(V1); auto *LI2 = dyn_cast(V2); if (LI1 && LI2) { - if (LI1->getParent() != LI2->getParent()) + if (LI1->getParent() != LI2->getParent() || !LI1->isSimple() || + !LI2->isSimple()) return LookAheadHeuristics::ScoreFail; Optional Dist = getPointersDiff( @@ -1205,7 +1260,7 @@ SmallVector Ops(MainAltOps.begin(), MainAltOps.end()); Ops.push_back(I1); Ops.push_back(I2); - InstructionsState S = getSameOpcode(Ops); + InstructionsState S = getSameOpcode(Ops, TLI); // Note: Only consider instructions with <= 2 operands to avoid // complexity explosion. if (S.getOpcode() && @@ -1381,6 +1436,7 @@ /// A vector of operand vectors. SmallVector OpsVec; + const TargetLibraryInfo &TLI; const DataLayout &DL; ScalarEvolution &SE; const BoUpSLP &R; @@ -1482,7 +1538,7 @@ int getLookAheadScore(Value *LHS, Value *RHS, ArrayRef MainAltOps, int Lane, unsigned OpIdx, unsigned Idx, bool &IsUsed) { - LookAheadHeuristics LookAhead(DL, SE, R, getNumLanes(), + LookAheadHeuristics LookAhead(TLI, DL, SE, R, getNumLanes(), LookAheadMaxDepth); // Keep track of the instruction stack as we recurse into the operands // during the look-ahead score exploration. @@ -1704,7 +1760,7 @@ // Use Boyer-Moore majority voting for finding the majority opcode and // the number of times it occurs. if (auto *I = dyn_cast(OpData.V)) { - if (!OpcodeI || !getSameOpcode({OpcodeI, I}).getOpcode() || + if (!OpcodeI || !getSameOpcode({OpcodeI, I}, TLI).getOpcode() || I->getParent() != Parent) { if (NumOpsWithSameOpcodeParent == 0) { NumOpsWithSameOpcodeParent = 1; @@ -1806,9 +1862,9 @@ public: /// Initialize with all the operands of the instruction vector \p RootVL. - VLOperands(ArrayRef RootVL, const DataLayout &DL, - ScalarEvolution &SE, const BoUpSLP &R) - : DL(DL), SE(SE), R(R) { + VLOperands(ArrayRef RootVL, const TargetLibraryInfo &TLI, + const DataLayout &DL, ScalarEvolution &SE, const BoUpSLP &R) + : TLI(TLI), DL(DL), SE(SE), R(R) { // Append all the operands of RootVL. appendOperandsOfVL(RootVL); } @@ -1949,7 +2005,7 @@ if (MainAltOps[OpIdx].size() != 2) { OperandData &AltOp = getData(OpIdx, Lane); InstructionsState OpS = - getSameOpcode({MainAltOps[OpIdx].front(), AltOp.V}); + getSameOpcode({MainAltOps[OpIdx].front(), AltOp.V}, TLI); if (OpS.getOpcode() && OpS.isAltShuffle()) MainAltOps[OpIdx].push_back(AltOp.V); } @@ -2024,7 +2080,7 @@ Optional findBestRootPair(ArrayRef> Candidates, int Limit = LookAheadHeuristics::ScoreFail) { - LookAheadHeuristics LookAhead(*DL, *SE, *this, /*NumLanes=*/2, + LookAheadHeuristics LookAhead(*TLI, *DL, *SE, *this, /*NumLanes=*/2, RootLookAheadMaxDepth); int BestScore = Limit; Optional Index = None; @@ -2187,6 +2243,8 @@ static void reorderInputsAccordingToOpcode(ArrayRef VL, SmallVectorImpl &Left, SmallVectorImpl &Right, + const TargetLibraryInfo &TLI, + const TargetTransformInfo &TTI, const DataLayout &DL, ScalarEvolution &SE, const BoUpSLP &R); @@ -2547,7 +2605,7 @@ return UndefValue::get(VL.front()->getType()); return VL[Idx]; }); - InstructionsState S = getSameOpcode(Last->Scalars); + InstructionsState S = getSameOpcode(Last->Scalars, *TLI); Last->setOperations(S); Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end()); } @@ -4521,7 +4579,7 @@ return true; }; - InstructionsState S = getSameOpcode(VL); + InstructionsState S = getSameOpcode(VL, *TLI); if (Depth == RecursionMaxDepth) { LLVM_DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n"); if (TryToFindDuplicates(S)) @@ -4988,7 +5046,8 @@ // Commutative predicate - collect + sort operands of the instructions // so that each side is more likely to have the same opcode. assert(P0 == SwapP0 && "Commutative Predicate mismatch"); - reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this); + reorderInputsAccordingToOpcode(VL, Left, Right, *TLI, *TTI, *DL, *SE, + *this); } else { // Collect operands - commute if it uses the swapped predicate. for (Value *V : VL) { @@ -5035,7 +5094,8 @@ // have the same opcode. if (isa(VL0) && VL0->isCommutative()) { ValueList Left, Right; - reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this); + reorderInputsAccordingToOpcode(VL, Left, Right, *TLI, *TTI, *DL, *SE, + *this); TE->setOperand(0, Left); TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); @@ -5321,7 +5381,8 @@ if (!CI || all_of(VL, [](Value *V) { return cast(V)->isCommutative(); })) { - reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this); + reorderInputsAccordingToOpcode(VL, Left, Right, *TLI, *TTI, *DL, *SE, + *this); } else { CmpInst::Predicate P0 = CI->getPredicate(); CmpInst::Predicate AltP0 = cast(S.AltOp)->getPredicate(); @@ -5340,9 +5401,9 @@ if (P0 == AltP0Swapped) { if (CI != Cmp && S.AltOp != Cmp && ((P0 == CurrentPred && - !areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)) || + !areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS, *TLI)) || (AltP0 == CurrentPred && - areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS)))) + areCompatibleCmpOps(BaseOp0, BaseOp1, LHS, RHS, *TLI)))) std::swap(LHS, RHS); } else if (P0 != CurrentPred && AltP0 != CurrentPred) { std::swap(LHS, RHS); @@ -5637,7 +5698,8 @@ /// given \p MainOp and \p AltOp instructions. static bool isAlternateInstruction(const Instruction *I, const Instruction *MainOp, - const Instruction *AltOp) { + const Instruction *AltOp, + const TargetLibraryInfo &TLI) { if (auto *CI0 = dyn_cast(MainOp)) { auto *AltCI0 = cast(AltOp); auto *CI = cast(I); @@ -5650,7 +5712,7 @@ return I == AltCI0 || (I != MainOp && !areCompatibleCmpOps(CI0->getOperand(0), CI0->getOperand(1), - CI->getOperand(0), CI->getOperand(1))); + CI->getOperand(0), CI->getOperand(1), TLI)); return AltP0 == CurrentPred || AltP0Swapped == CurrentPred; } return I->getOpcode() == AltOp->getOpcode(); @@ -7249,15 +7311,14 @@ // Perform operand reordering on the instructions in VL and return the reordered // operands in Left and Right. -void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef VL, - SmallVectorImpl &Left, - SmallVectorImpl &Right, - const DataLayout &DL, - ScalarEvolution &SE, - const BoUpSLP &R) { +void BoUpSLP::reorderInputsAccordingToOpcode( + ArrayRef VL, SmallVectorImpl &Left, + SmallVectorImpl &Right, const TargetLibraryInfo &TLI, + const TargetTransformInfo &TTI, const DataLayout &DL, ScalarEvolution &SE, + const BoUpSLP &R) { if (VL.empty()) return; - VLOperands Ops(VL, DL, SE, R); + VLOperands Ops(VL, TLI, DL, SE, R); // Reorder the operands in place. Ops.reorder(); Left = Ops.getVL(0); @@ -7517,7 +7578,7 @@ Value *BoUpSLP::vectorizeTree(ArrayRef VL) { const unsigned VF = VL.size(); - InstructionsState S = getSameOpcode(VL); + InstructionsState S = getSameOpcode(VL, *TLI); if (S.getOpcode()) { if (TreeEntry *E = getTreeEntry(S.OpValue)) if (E->isSame(VL)) { @@ -7644,11 +7705,47 @@ bool NeedToShuffleReuses = !E->ReuseShuffleIndices.empty(); unsigned VF = E->getVectorFactor(); + auto &&SetInsertPointAfterOps = [this](ArrayRef VL) { + // The last instruction in the bundle in program order. + Instruction *LastInst = nullptr; + + for (Value *V : VL) { + // If the value was vectorized, need to get the vector value for correct + // insert point. + if (const TreeEntry *TE = getTreeEntry(V)) + if (TE->VectorizedValue) + V = TE->VectorizedValue; + auto *I = dyn_cast(V); + if (!I) + continue; + if (!DT->isReachableFromEntry(I->getParent())) + continue; + if (!LastInst) { + LastInst = I; + continue; + } + if ((LastInst->getParent() != I->getParent() && + DT->dominates(LastInst->getParent(), I->getParent())) || + (LastInst->getParent() == I->getParent() && LastInst->comesBefore(I))) + LastInst = I; + } + // Set the insertion point after the last instruction in the bundle. Set + // the debug location to Front. + if (!LastInst) + return; + if (isa(LastInst)) + Builder.SetInsertPoint(LastInst->getParent(), + LastInst->getParent()->getFirstInsertionPt()); + else + Builder.SetInsertPoint(LastInst->getParent(), + std::next(LastInst->getIterator())); + Builder.SetCurrentDebugLocation(LastInst->getDebugLoc()); + }; + ShuffleInstructionBuilder ShuffleBuilder(Builder, VF, GatherShuffleSeq, CSEBlocks); if (E->State == TreeEntry::NeedToGather) { - if (E->getMainOp()) - setInsertPointAfterBundle(E); + SetInsertPointAfterOps(E->Scalars); Value *Vec; SmallVector Mask; SmallVector Entries; @@ -8193,9 +8290,10 @@ SmallVector Mask; buildShuffleEntryMask( E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices, - [E](Instruction *I) { + [E, this](Instruction *I) { assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode"); - return isAlternateInstruction(I, E->getMainOp(), E->getAltOp()); + return isAlternateInstruction(I, E->getMainOp(), E->getAltOp(), + *TLI); }, Mask, &OpScalars, &AltScalars); @@ -10086,7 +10184,7 @@ // Check that all of the parts are instructions of the same type, // we permit an alternate opcode via InstructionsState. - InstructionsState S = getSameOpcode(VL); + InstructionsState S = getSameOpcode(VL, *TLI); if (!S.getOpcode()) return false; @@ -10783,7 +10881,8 @@ } /// Attempt to vectorize the tree found by matchAssociativeReduction. - Value *tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) { + Value *tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI, + const TargetLibraryInfo &TLI) { constexpr int ReductionLimit = 4; constexpr unsigned RegMaxNumber = 4; constexpr unsigned RedValsMaxNumber = 128; @@ -10845,7 +10944,7 @@ // Try to vectorize elements based on their type. for (unsigned I = 0, E = ReducedVals.size(); I < E; ++I) { ArrayRef OrigReducedVals = ReducedVals[I]; - InstructionsState S = getSameOpcode(OrigReducedVals); + InstructionsState S = getSameOpcode(OrigReducedVals, TLI); SmallVector Candidates; DenseMap TrackedToOrig; for (unsigned Cnt = 0, Sz = OrigReducedVals.size(); Cnt < Sz; ++Cnt) { @@ -10864,7 +10963,7 @@ // Try to handle shuffled extractelements. if (S.getOpcode() == Instruction::ExtractElement && !S.isAltShuffle() && I + 1 < E) { - InstructionsState NextS = getSameOpcode(ReducedVals[I + 1]); + InstructionsState NextS = getSameOpcode(ReducedVals[I + 1], TLI); if (NextS.getOpcode() == Instruction::ExtractElement && !NextS.isAltShuffle()) { SmallVector CommonCandidates(Candidates); @@ -11504,7 +11603,7 @@ if (IsBinop || IsSelect) { HorizontalReduction HorRdx; if (HorRdx.matchAssociativeReduction(P, Inst, SE, DL, TLI)) - return HorRdx.tryToReduce(R, TTI); + return HorRdx.tryToReduce(R, TTI, TLI); } return nullptr; }; @@ -11702,7 +11801,8 @@ /// predicate of the second or the operands IDs are less than the operands IDs /// of the second cmp instruction. template -static bool compareCmp(Value *V, Value *V2, +static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI, + const TargetTransformInfo &TTI, function_ref IsDeleted) { auto *CI1 = cast(V); auto *CI2 = cast(V2); @@ -11738,7 +11838,7 @@ if (auto *I2 = dyn_cast(Op2)) { if (I1->getParent() != I2->getParent()) return false; - InstructionsState S = getSameOpcode({I1, I2}); + InstructionsState S = getSameOpcode({I1, I2}, TLI); if (S.getOpcode()) continue; return false; @@ -11782,15 +11882,16 @@ } // Try to vectorize list of compares. // Sort by type, compare predicate, etc. - auto &&CompareSorter = [&R](Value *V, Value *V2) { - return compareCmp(V, V2, + auto &&CompareSorter = [&R, TLI = TLI, TTI = TTI](Value *V, Value *V2) { + return compareCmp(V, V2, *TLI, *TTI, [&R](Instruction *I) { return R.isDeleted(I); }); }; - auto &&AreCompatibleCompares = [&R](Value *V1, Value *V2) { + auto &&AreCompatibleCompares = [&R, TLI = TLI, TTI = TTI](Value *V1, + Value *V2) { if (V1 == V2) return true; - return compareCmp(V1, V2, + return compareCmp(V1, V2, *TLI, *TTI, [&R](Instruction *I) { return R.isDeleted(I); }); }; auto Limit = [&R](Value *V) { @@ -11871,7 +11972,7 @@ "Different nodes should have different DFS numbers"); if (NodeI1 != NodeI2) return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn(); - InstructionsState S = getSameOpcode({I1, I2}); + InstructionsState S = getSameOpcode({I1, I2}, *TLI); if (S.getOpcode()) continue; return I1->getOpcode() < I2->getOpcode(); @@ -11888,7 +11989,7 @@ } return ConstOrder && *ConstOrder; }; - auto AreCompatiblePHIs = [&PHIToOpcodes](Value *V1, Value *V2) { + auto AreCompatiblePHIs = [&PHIToOpcodes, this](Value *V1, Value *V2) { if (V1 == V2) return true; if (V1->getType() != V2->getType()) @@ -11905,7 +12006,7 @@ if (auto *I2 = dyn_cast(Opcodes2[I])) { if (I1->getParent() != I2->getParent()) return false; - InstructionsState S = getSameOpcode({I1, I2}); + InstructionsState S = getSameOpcode({I1, I2}, *TLI); if (S.getOpcode()) continue; return false; @@ -12188,7 +12289,7 @@ "Different nodes should have different DFS numbers"); if (NodeI1 != NodeI2) return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn(); - InstructionsState S = getSameOpcode({I1, I2}); + InstructionsState S = getSameOpcode({I1, I2}, *TLI); if (S.getOpcode()) return false; return I1->getOpcode() < I2->getOpcode(); @@ -12200,7 +12301,7 @@ V2->getValueOperand()->getValueID(); }; - auto &&AreCompatibleStores = [](StoreInst *V1, StoreInst *V2) { + auto &&AreCompatibleStores = [this](StoreInst *V1, StoreInst *V2) { if (V1 == V2) return true; if (V1->getPointerOperandType() != V2->getPointerOperandType()) @@ -12213,7 +12314,7 @@ if (auto *I2 = dyn_cast(V2->getValueOperand())) { if (I1->getParent() != I2->getParent()) return false; - InstructionsState S = getSameOpcode({I1, I2}); + InstructionsState S = getSameOpcode({I1, I2}, *TLI); return S.getOpcode() > 0; } if (isa(V1->getValueOperand()) && diff --git a/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll b/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll --- a/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll @@ -62,11 +62,11 @@ ; Don't vectorize volatile loads. define void @test_volatile_load(double* %a, double* %b, double* %c) { ; CHECK-LABEL: @test_volatile_load( -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 1 -; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 1 -; CHECK-NEXT: [[I0:%.*]] = load volatile double, double* [[A]], align 8 -; CHECK-NEXT: [[I1:%.*]] = load volatile double, double* [[B]], align 8 +; CHECK-NEXT: [[I0:%.*]] = load volatile double, double* [[A:%.*]], align 8 +; CHECK-NEXT: [[I1:%.*]] = load volatile double, double* [[B:%.*]], align 8 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A]], i64 1 ; CHECK-NEXT: [[I3:%.*]] = load double, double* [[ARRAYIDX3]], align 8 +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B]], i64 1 ; CHECK-NEXT: [[I4:%.*]] = load double, double* [[ARRAYIDX4]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[I0]], i32 0 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[I3]], i32 1