diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3022,19 +3022,24 @@ return; } case Instruction::Call: { - // Check if the calls are all to the same vectorizable intrinsic. + // Check if the calls are all to the same vectorizable intrinsic or + // library function. CallInst *CI = cast(VL0); - // Check if this is an Intrinsic call or something that can be - // represented by an intrinsic call Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); - if (!isTriviallyVectorizable(ID)) { + + auto Shape = + VFShape::get(*CI, {static_cast(VL.size()), false}, + false /*HasGlobalPred*/); + Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape); + + if (!VecFunc && !isTriviallyVectorizable(ID)) { BS.cancelScheduling(VL, VL0); newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } - Function *Int = CI->getCalledFunction(); + Function *F = CI->getCalledFunction(); unsigned NumArgs = CI->getNumArgOperands(); SmallVector ScalarArgs(NumArgs, nullptr); for (unsigned j = 0; j != NumArgs; ++j) @@ -3042,8 +3047,10 @@ ScalarArgs[j] = CI->getArgOperand(j); for (Value *V : VL) { CallInst *CI2 = dyn_cast(V); - if (!CI2 || CI2->getCalledFunction() != Int || + if (!CI2 || CI2->getCalledFunction() != F || getVectorIntrinsicIDForCall(CI2, TLI) != ID || + (VecFunc && + VecFunc != VFDatabase(*CI2).getVectorizedFunction(Shape)) || !CI->hasIdenticalOperandBundleSchema(*CI2)) { BS.cancelScheduling(VL, VL0); newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, @@ -4507,7 +4514,8 @@ Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI); auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI); - bool UseIntrinsic = VecCallCosts.first <= VecCallCosts.second; + bool UseIntrinsic = ID != Intrinsic::not_intrinsic && + VecCallCosts.first <= VecCallCosts.second; Value *ScalarArg = nullptr; std::vector OpVecs; @@ -4527,15 +4535,16 @@ OpVecs.push_back(OpVec); } - Module *M = F->getParent(); - Type *Tys[] = {FixedVectorType::get(CI->getType(), E->Scalars.size())}; - Function *CF = Intrinsic::getDeclaration(M, ID, Tys); - + Function *CF; if (!UseIntrinsic) { VFShape Shape = VFShape::get( *CI, {static_cast(VecTy->getNumElements()), false}, false /*HasGlobalPred*/); CF = VFDatabase(*CI).getVectorizedFunction(Shape); + } else { + Module *M = F->getParent(); + Type *Tys[] = {FixedVectorType::get(CI->getType(), E->Scalars.size())}; + CF = Intrinsic::getDeclaration(M, ID, Tys); } SmallVector OpBundles; @@ -5086,6 +5095,12 @@ return II && II->getIntrinsicID() == Intrinsic::sideeffect; } +static bool isVectorizableLibFunctionCall(Instruction *I) { + auto *CI = dyn_cast(I); + + return CI && !VFDatabase::getMappings(*CI).empty(); +} + void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI, Instruction *ToI, ScheduleData *PrevLoadStore, @@ -5102,7 +5117,8 @@ "new ScheduleData already in scheduling region"); SD->init(SchedulingRegionID, I); - if (I->mayReadOrWriteMemory() && !isSideeffectIntrinsic(I)) { + if (I->mayReadOrWriteMemory() && !isSideeffectIntrinsic(I) && + !isVectorizableLibFunctionCall(I)) { // Update the linked list of memory accessing instructions. if (CurrentLoadStore) { CurrentLoadStore->NextLoadStore = SD;