diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -2871,7 +2871,7 @@ return; } - LLVM_DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n"); + LLVM_DEBUG(dbgs() << "SLP: Gathering non-vectorized loads.\n"); BS.cancelScheduling(VL, VL0); newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); @@ -3132,12 +3132,20 @@ } return; } + // Vectorizing non-consecutive stores with `llvm.masked.scatter`. + TreeEntry *TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S, + UserTreeIdx, ReuseShuffleIndicies); + TE->setOperandsInOrder(); + buildTree_rec(PointerOps, Depth + 1, {TE, 0}); + LLVM_DEBUG( + dbgs() << "SLP: added a vector of non-consecutive stores.\n"); + return; } BS.cancelScheduling(VL, VL0); newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); - LLVM_DEBUG(dbgs() << "SLP: Non-consecutive store.\n"); + LLVM_DEBUG(dbgs() << "SLP: Gathering non-vectorized store.\n"); return; } case Instruction::Call: { @@ -3741,8 +3749,17 @@ if (NeedToShuffleReuses) ReuseShuffleCost = -(ReuseShuffleNumbers - VL.size()) * ScalarEltCost; int ScalarStCost = VecTy->getNumElements() * ScalarEltCost; - int VecStCost = TTI->getMemoryOpCost(Instruction::Store, - VecTy, Alignment, 0, CostKind, VL0); + int VecStCost; + if (E->State == TreeEntry::Vectorize) { + VecStCost = TTI->getMemoryOpCost(Instruction::Store, VecTy, Alignment, + 0, CostKind, VL0); + } else { + assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state"); + VecStCost = TTI->getGatherScatterOpCost( + Instruction::Store, VecTy, + cast(VL0)->getPointerOperand(), + /*VariableMask=*/false, Alignment, CostKind, VL0); + } if (IsReorder) { // TODO: Merge this shuffle with the ReuseShuffleCost. VecStCost += TTI->getShuffleCost( @@ -4591,25 +4608,37 @@ setInsertPointAfterBundle(E); + Instruction *NewSI; Value *VecValue = vectorizeTree(E->getOperand(0)); if (IsReorder) { SmallVector Mask(E->ReorderIndices.begin(), E->ReorderIndices.end()); VecValue = Builder.CreateShuffleVector(VecValue, Mask, "reorder_shuf"); } - Value *ScalarPtr = SI->getPointerOperand(); - Value *VecPtr = Builder.CreateBitCast( - ScalarPtr, VecValue->getType()->getPointerTo(AS)); - StoreInst *ST = Builder.CreateAlignedStore(VecValue, VecPtr, - SI->getAlign()); - - // The pointer operand uses an in-tree scalar, so add the new BitCast to - // ExternalUses to make sure that an extract will be generated in the - // future. - if (getTreeEntry(ScalarPtr)) - ExternalUses.push_back(ExternalUser(ScalarPtr, cast(VecPtr), 0)); + if (E->State == TreeEntry::Vectorize) { + Value *ScalarPtr = SI->getPointerOperand(); + Value *VecPtr = Builder.CreateBitCast( + ScalarPtr, VecValue->getType()->getPointerTo(AS)); + NewSI = Builder.CreateAlignedStore(VecValue, VecPtr, SI->getAlign()); + + // The pointer operand uses an in-tree scalar, so add the new BitCast to + // ExternalUses to make sure that an extract will be generated in the + // future. + if (getTreeEntry(ScalarPtr)) + ExternalUses.push_back( + ExternalUser(ScalarPtr, cast(VecPtr), 0)); + } else { + assert(E->State == TreeEntry::ScatterVectorize && "Unhandled state"); + Value *VecPtr = vectorizeTree(E->getOperand(1)); + // Use the minimum alignment of the scattered stores. + Align CommonAlignment = SI->getAlign(); + for (Value *V : E->Scalars) + CommonAlignment = + commonAlignment(CommonAlignment, cast(V)->getAlign()); + NewSI = Builder.CreateMaskedScatter(VecValue, VecPtr, CommonAlignment); + } - Value *V = propagateMetadata(ST, E->Scalars); + Value *V = propagateMetadata(NewSI, E->Scalars); if (NeedToShuffleReuses) V = Builder.CreateShuffleVector(V, E->ReuseShuffleIndices, "shuffle");