Index: llvm/include/llvm/Analysis/VectorUtils.h =================================================================== --- llvm/include/llvm/Analysis/VectorUtils.h +++ llvm/include/llvm/Analysis/VectorUtils.h @@ -462,8 +462,11 @@ /// metadata value that covers all of the individual values), and set I's /// metadata for M equal to the intersection value. /// +/// When RemoveNoAlias is true, MD_noalias will always get a null value. +/// /// This function always sets a (possibly null) value for each K in Kinds. -Instruction *propagateMetadata(Instruction *I, ArrayRef VL); +Instruction *propagateMetadata(Instruction *I, ArrayRef VL, + bool RemoveNoAlias = true); /// Create a mask that filters the members of an interleave group where there /// are gaps. Index: llvm/lib/Analysis/VectorUtils.cpp =================================================================== --- llvm/lib/Analysis/VectorUtils.cpp +++ llvm/lib/Analysis/VectorUtils.cpp @@ -708,7 +708,8 @@ } /// \returns \p I after propagating metadata from \p VL. -Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef VL) { +Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef VL, + bool RemoveNoAlias) { if (VL.empty()) return Inst; Instruction *I0 = cast(VL[0]); @@ -721,6 +722,8 @@ LLVMContext::MD_access_group}) { MDNode *MD = I0->getMetadata(Kind); + if (RemoveNoAlias && (Kind == LLVMContext::MD_noalias)) + MD = nullptr; for (int J = 1, E = VL.size(); MD && J != E; ++J) { const Instruction *IJ = cast(VL[J]); MDNode *IMD = IJ->getMetadata(Kind); @@ -746,7 +749,6 @@ llvm_unreachable("unhandled metadata"); } } - Inst->setMetadata(Kind, MD); } Index: llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -272,9 +272,10 @@ // The real propagateMetadata expects a SmallVector, but we deal in // vectors of Instructions. -static void propagateMetadata(Instruction *I, ArrayRef IL) { +static void propagateMetadata(Instruction *I, ArrayRef IL, + bool RemoveNoAlias) { SmallVector VL(IL.begin(), IL.end()); - propagateMetadata(I, VL); + propagateMetadata(I, VL, RemoveNoAlias); } // Vectorizer Implementation @@ -1127,11 +1128,14 @@ } } + bool HasProvenance = llvm::any_of(Chain, [](const auto &I) { + return cast(I)->hasNoaliasProvenanceOperand(); + }); StoreInst *SI = Builder.CreateAlignedStore( Vec, Builder.CreateBitCast(S0->getPointerOperand(), VecTy->getPointerTo(AS)), Alignment); - propagateMetadata(SI, Chain); + propagateMetadata(SI, Chain, HasProvenance); eraseInstructions(Chain); ++NumVectorInstructions; @@ -1249,11 +1253,14 @@ std::tie(First, Last) = getBoundaryInstrs(Chain); Builder.SetInsertPoint(&*First); + bool HasProvenance = llvm::any_of(Chain, [](const auto &I) { + return cast(I)->hasNoaliasProvenanceOperand(); + }); Value *Bitcast = Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS)); LoadInst *LI = Builder.CreateAlignedLoad(VecTy, Bitcast, MaybeAlign(Alignment)); - propagateMetadata(LI, Chain); + propagateMetadata(LI, Chain, HasProvenance); if (VecLoadTy) { SmallVector InstrsToErase; Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1170,7 +1170,13 @@ void InnerLoopVectorizer::addMetadata(Instruction *To, Instruction *From) { - propagateMetadata(To, From); + bool HasProvenance = true; + if (auto *SI = dyn_cast(From)) { + HasProvenance = SI->hasNoaliasProvenanceOperand(); + } else if (auto *LI = dyn_cast(From)) { + HasProvenance = LI->hasNoaliasProvenanceOperand(); + } + propagateMetadata(To, From, HasProvenance); addNewMetadata(To, From); } Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1691,6 +1691,53 @@ } } + /// Set the operands of this bundle of load or store instructions in their + /// original order. + void setLoadStoreOperandsInOrder() { + assert(Operands.empty() && "Already initialized?"); + auto *I0 = cast(Scalars[0]); + assert((isa(I0) || isa(I0)) && + "Expect a load or store instruction"); + unsigned NumBaseOperands = isa(I0) ? 1 : 2; + + // Check if any instruction has a ptr_provenance + bool HasProvenance = llvm::any_of(Scalars, [&](auto *V) { + return cast(V)->getNumOperands() != NumBaseOperands; + }); + + Operands.resize(NumBaseOperands + HasProvenance); + unsigned NumLanes = Scalars.size(); + for (unsigned OpIdx = 0; OpIdx != NumBaseOperands; ++OpIdx) { + auto &Op = Operands[OpIdx]; + Op.resize(NumLanes); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + auto *I = cast(Scalars[Lane]); + assert(((I->getNumOperands() == NumBaseOperands) || + (I->getNumOperands() == NumBaseOperands + 1)) && + "Expected same number of operands (ignoring the " + "ptr_provenance"); + Op[Lane] = I->getOperand(OpIdx); + } + } + + if (HasProvenance) { + // At least one instruction has a ptr_provenance. + // Keep track of the dependencies brought in by it. Later on we will + // omit the noalias information. + auto &Op = Operands[NumBaseOperands]; + Op.resize(NumLanes); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + auto *I = cast(Scalars[Lane]); + if (I->getNumOperands() != NumBaseOperands) { + Op[Lane] = I->getOperand(NumBaseOperands); + } else { + Op[Lane] = + UndefValue::get(I->getOperand(NumBaseOperands - 1)->getType()); + } + } + } + } + /// \returns the \p OpIdx operand of this TreeEntry. ValueList &getOperand(unsigned OpIdx) { assert(OpIdx < Operands.size() && "Off bounds"); @@ -1910,7 +1957,7 @@ /// Maps a specific scalar to its tree entry. SmallDenseMap ScalarToTreeEntry; - /// Maps a value to the proposed vectorizable size. + /// Maps a value to the proposed vectorizable size. SmallDenseMap InstrElementSize; /// A list of scalars that we found that we need to keep as scalars. @@ -2234,7 +2281,8 @@ auto *In = TE->getMainOp(); assert(In && (isa(In) || isa(In) || - isa(In) || + isa(In) || isa(In) || + isa(In) || In->getNumOperands() == TE->getNumOperands()) && "Missed TreeEntry operands?"); (void)In; // fake use to avoid build failure when assertions disabled @@ -2971,14 +3019,14 @@ ++NumOpsWantToKeepOriginalOrder; TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n"); } else { // Need to reorder. TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies, CurrentOrder); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n"); findRootOrder(CurrentOrder); ++NumOpsWantToKeepOrder[CurrentOrder]; @@ -3246,14 +3294,14 @@ ++NumOpsWantToKeepOriginalOrder; TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); buildTree_rec(Operands, Depth + 1, {TE, 0}); LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n"); } else { TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies, CurrentOrder); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); buildTree_rec(Operands, Depth + 1, {TE, 0}); LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n"); findRootOrder(CurrentOrder); @@ -4840,7 +4888,7 @@ auto *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace()); Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy); LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign()); - Value *NewV = propagateMetadata(V, E->Scalars); + Value *NewV = propagateMetadata(V, E->Scalars, true); ShuffleBuilder.addInversedMask(E->ReorderIndices); ShuffleBuilder.addMask(E->ReuseShuffleIndices); NewV = ShuffleBuilder.finalize(NewV); @@ -5056,7 +5104,8 @@ commonAlignment(CommonAlignment, cast(V)->getAlign()); NewLI = Builder.CreateMaskedGather(VecPtr, CommonAlignment); } - Value *V = propagateMetadata(NewLI, E->Scalars); + Value *V = + propagateMetadata(NewLI, E->Scalars, (E->getNumOperands() == 2)); ShuffleBuilder.addInversedMask(E->ReorderIndices); ShuffleBuilder.addMask(E->ReuseShuffleIndices); @@ -5089,7 +5138,7 @@ if (getTreeEntry(ScalarPtr)) ExternalUses.push_back(ExternalUser(ScalarPtr, cast(VecPtr), 0)); - Value *V = propagateMetadata(ST, E->Scalars); + Value *V = propagateMetadata(ST, E->Scalars, (E->getNumOperands() == 3)); E->VectorizedValue = V; ++NumVectorInstructions; Index: llvm/test/Transforms/SLPVectorizer/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SLPVectorizer/noalias.ll @@ -0,0 +1,55 @@ +; RUN: opt -S < %s -slp-vectorizer -slp-max-reg-size=128 -slp-min-reg-size=128 | FileCheck %s + +; SLP vectorization across a @llvm.provenance.noalias and provenance + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.sideeffect() #0 + +define void @test(float* %p) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[P1_DECL:%.*]] = tail call i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float** null, i32 0, metadata !0) +; CHECK-NEXT: [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0 +; CHECK-NEXT: [[P1:%.*]] = getelementptr float, float* [[P]], i64 1 +; CHECK-NEXT: [[PROVENANCE_P1:%.*]] = tail call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* [[P1]], i8* [[P1_DECL]], float** null, float** undef, i32 0, metadata !0), !noalias !0 +; CHECK-NEXT: [[P2:%.*]] = getelementptr float, float* [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr float, float* [[P]], i64 3 +; CHECK-NEXT: call void @llvm.sideeffect() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P0]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.sideeffect() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[P0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: ret void +; + %p1.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float** null, i32 0, metadata !0) + %p0 = getelementptr float, float* %p, i64 0 + %p1 = getelementptr float, float* %p, i64 1 + %prov.p1 = tail call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %p1, i8* %p1.decl, float** null, float** undef, i32 0, metadata !0), !noalias !0 + %p2 = getelementptr float, float* %p, i64 2 + %p3 = getelementptr float, float* %p, i64 3 + %l0 = load float, float* %p0, !noalias !0 + %l1 = load float, float* %p1, ptr_provenance float* %prov.p1, !noalias !0 + %l2 = load float, float* %p2, !noalias !0 + call void @llvm.sideeffect() + %l3 = load float, float* %p3, !noalias !0 + store float %l0, float* %p0, !noalias !0 + call void @llvm.sideeffect() + store float %l1, float* %p1, ptr_provenance float* %prov.p1, !noalias !0 + store float %l2, float* %p2, !noalias !0 + store float %l3, float* %p3, !noalias !0 + ret void +} + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float**, i32, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float*, i8*, float**, float**, i32, metadata) #2 + +attributes #0 = { inaccessiblememonly nounwind willreturn } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!0 = !{!1} +!1 = distinct !{!1, !2, !"test_f: p"} +!2 = distinct !{!2, !"test_f"}