Index: llvm/include/llvm/Analysis/VectorUtils.h =================================================================== --- llvm/include/llvm/Analysis/VectorUtils.h +++ llvm/include/llvm/Analysis/VectorUtils.h @@ -456,8 +456,11 @@ /// metadata value that covers all of the individual values), and set I's /// metadata for M equal to the intersection value. /// +/// When RemoveNoAlias is true, MD_noalias will always get a null value. +/// /// This function always sets a (possibly null) value for each K in Kinds. -Instruction *propagateMetadata(Instruction *I, ArrayRef VL); +Instruction *propagateMetadata(Instruction *I, ArrayRef VL, + bool RemoveNoAlias = true); /// Create a mask that filters the members of an interleave group where there /// are gaps. Index: llvm/lib/Analysis/VectorUtils.cpp =================================================================== --- llvm/lib/Analysis/VectorUtils.cpp +++ llvm/lib/Analysis/VectorUtils.cpp @@ -702,7 +702,8 @@ } /// \returns \p I after propagating metadata from \p VL. -Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef VL) { +Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef VL, + bool RemoveNoAlias) { Instruction *I0 = cast(VL[0]); SmallVector, 4> Metadata; I0->getAllMetadataOtherThanDebugLoc(Metadata); @@ -713,6 +714,8 @@ LLVMContext::MD_access_group}) { MDNode *MD = I0->getMetadata(Kind); + if (RemoveNoAlias && (Kind == LLVMContext::MD_noalias)) + MD = nullptr; for (int J = 1, E = VL.size(); MD && J != E; ++J) { const Instruction *IJ = cast(VL[J]); MDNode *IMD = IJ->getMetadata(Kind); @@ -738,7 +741,6 @@ llvm_unreachable("unhandled metadata"); } } - Inst->setMetadata(Kind, MD); } Index: llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -264,9 +264,10 @@ // The real propagateMetadata expects a SmallVector, but we deal in // vectors of Instructions. -static void propagateMetadata(Instruction *I, ArrayRef IL) { +static void propagateMetadata(Instruction *I, ArrayRef IL, + bool RemoveNoAlias) { SmallVector VL(IL.begin(), IL.end()); - propagateMetadata(I, VL); + propagateMetadata(I, VL, RemoveNoAlias); } // Vectorizer Implementation @@ -1115,11 +1116,15 @@ } } + bool HasProvenance = + std::any_of(Chain.begin(), Chain.end(), [](const auto &I) { + return cast(I)->hasNoaliasProvenanceOperand(); + }); StoreInst *SI = Builder.CreateAlignedStore( Vec, Builder.CreateBitCast(S0->getPointerOperand(), VecTy->getPointerTo(AS)), Alignment); - propagateMetadata(SI, Chain); + propagateMetadata(SI, Chain, HasProvenance); eraseInstructions(Chain); ++NumVectorInstructions; @@ -1237,11 +1242,15 @@ std::tie(First, Last) = getBoundaryInstrs(Chain); Builder.SetInsertPoint(&*First); + bool HasProvenance = + std::any_of(Chain.begin(), Chain.end(), [](const auto &I) { + return cast(I)->hasNoaliasProvenanceOperand(); + }); Value *Bitcast = Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS)); LoadInst *LI = Builder.CreateAlignedLoad(VecTy, Bitcast, MaybeAlign(Alignment)); - propagateMetadata(LI, Chain); + propagateMetadata(LI, Chain, HasProvenance); if (VecLoadTy) { SmallVector InstrsToErase; Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -921,7 +921,13 @@ void InnerLoopVectorizer::addMetadata(Instruction *To, Instruction *From) { - propagateMetadata(To, From); + bool HasProvenance = true; + if (auto *SI = dyn_cast(From)) { + HasProvenance = SI->hasNoaliasProvenanceOperand(); + } else if (auto *LI = dyn_cast(From)) { + HasProvenance = LI->hasNoaliasProvenanceOperand(); + } + propagateMetadata(To, From, HasProvenance); addNewMetadata(To, From); } Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1521,6 +1521,53 @@ } } + /// Set the operands of this bundle of load or store instructions in their + /// original order. + void setLoadStoreOperandsInOrder() { + assert(Operands.empty() && "Already initialized?"); + auto *I0 = cast(Scalars[0]); + assert((isa(I0) || isa(I0)) && + "Expect a load or store instruction"); + unsigned NumOperands = isa(I0) ? 1 : 2; + + // Check if any instruction has a ptr_provenance + bool HasProvenance = + std::any_of(Scalars.begin(), Scalars.end(), [&](auto *V) { + return cast(V)->getNumOperands() != NumOperands; + }); + + Operands.resize(NumOperands + HasProvenance); + unsigned NumLanes = Scalars.size(); + for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { + Operands[OpIdx].resize(NumLanes); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + auto *I = cast(Scalars[Lane]); + assert(((I->getNumOperands() == NumOperands) || + (I->getNumOperands() == NumOperands + 1)) && + "Expected same number of operands (ignoring the " + "ptr_provenance"); + Operands[OpIdx][Lane] = I->getOperand(OpIdx); + } + } + + if (HasProvenance) { + // At least one instruction has a ptr_provenance. + // Keep track of the dependencies brought in by it. Later on we will + // omit the noalias information. + unsigned OpIdx = NumOperands; + Operands[OpIdx].resize(NumLanes); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + auto *I = cast(Scalars[Lane]); + if (I->getNumOperands() != NumOperands) { + Operands[OpIdx][Lane] = I->getOperand(OpIdx); + } else { + Operands[OpIdx][Lane] = + UndefValue::get(I->getOperand(OpIdx - 1)->getType()); + } + } + } + } + /// \returns the \p OpIdx operand of this TreeEntry. ValueList &getOperand(unsigned OpIdx) { assert(OpIdx < Operands.size() && "Off bounds"); @@ -2040,6 +2087,7 @@ auto *In = TE->getMainOp(); assert(In && (isa(In) || isa(In) || + isa(In) || isa(In) || In->getNumOperands() == TE->getNumOperands()) && "Missed TreeEntry operands?"); (void)In; // fake use to avoid build failure when assertions disabled @@ -2735,7 +2783,7 @@ ++NumOpsWantToKeepOriginalOrder; TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n"); } else { // Need to reorder. @@ -2744,7 +2792,7 @@ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies, I->getFirst()); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n"); } return; @@ -2997,7 +3045,7 @@ ++NumOpsWantToKeepOriginalOrder; TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); buildTree_rec(Operands, Depth + 1, {TE, 0}); LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n"); } else { @@ -3007,7 +3055,7 @@ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies, I->getFirst()); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); buildTree_rec(Operands, Depth + 1, {TE, 0}); LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled stores.\n"); } @@ -4228,7 +4276,7 @@ PointerType::get(VecTy, LI->getPointerAddressSpace()); Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy); LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlign()); - Value *NewV = propagateMetadata(V, E->Scalars); + Value *NewV = propagateMetadata(V, E->Scalars, true); if (!E->ReorderIndices.empty()) { SmallVector Mask; inversePermutation(E->ReorderIndices, Mask); @@ -4409,7 +4457,7 @@ ExternalUses.push_back(ExternalUser(PO, cast(VecPtr), 0)); LI = Builder.CreateAlignedLoad(VecTy, VecPtr, LI->getAlign()); - Value *V = propagateMetadata(LI, E->Scalars); + Value *V = propagateMetadata(LI, E->Scalars, (E->getNumOperands() == 2)); if (IsReorder) { SmallVector Mask; inversePermutation(E->ReorderIndices, Mask); @@ -4453,7 +4501,7 @@ if (getTreeEntry(ScalarPtr)) ExternalUses.push_back(ExternalUser(ScalarPtr, cast(VecPtr), 0)); - Value *V = propagateMetadata(ST, E->Scalars); + Value *V = propagateMetadata(ST, E->Scalars, (E->getNumOperands() == 3)); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle"); Index: llvm/test/Transforms/SLPVectorizer/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SLPVectorizer/noalias.ll @@ -0,0 +1,55 @@ +; RUN: opt -S < %s -slp-vectorizer -slp-max-reg-size=128 -slp-min-reg-size=128 | FileCheck %s + +; SLP vectorization across a @llvm.provenance.noalias and provenance + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.sideeffect() #0 + +define void @test(float* %p) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[P1_DECL:%.*]] = tail call i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float** null, i32 0, metadata !0) +; CHECK-NEXT: [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0 +; CHECK-NEXT: [[P1:%.*]] = getelementptr float, float* [[P]], i64 1 +; CHECK-NEXT: [[PROVENANCE_P1:%.*]] = tail call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* [[P1]], i8* [[P1_DECL]], float** null, float** undef, i32 0, metadata !0), !noalias !0 +; CHECK-NEXT: [[P2:%.*]] = getelementptr float, float* [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr float, float* [[P]], i64 3 +; CHECK-NEXT: call void @llvm.sideeffect() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P0]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.sideeffect() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[P0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: ret void +; + %p1.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float** null, i32 0, metadata !0) + %p0 = getelementptr float, float* %p, i64 0 + %p1 = getelementptr float, float* %p, i64 1 + %prov.p1 = tail call float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %p1, i8* %p1.decl, float** null, float** undef, i32 0, metadata !0), !noalias !0 + %p2 = getelementptr float, float* %p, i64 2 + %p3 = getelementptr float, float* %p, i64 3 + %l0 = load float, float* %p0, !noalias !0 + %l1 = load float, float* %p1, ptr_provenance float* %prov.p1, !noalias !0 + %l2 = load float, float* %p2, !noalias !0 + call void @llvm.sideeffect() + %l3 = load float, float* %p3, !noalias !0 + store float %l0, float* %p0, !noalias !0 + call void @llvm.sideeffect() + store float %l1, float* %p1, ptr_provenance float* %prov.p1, !noalias !0 + store float %l2, float* %p2, !noalias !0 + store float %l3, float* %p3, !noalias !0 + ret void +} + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float**, i32, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare float* @llvm.provenance.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float*, i8*, float**, float**, i32, metadata) #2 + +attributes #0 = { inaccessiblememonly nounwind willreturn } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!0 = !{!1} +!1 = distinct !{!1, !2, !"test_f: p"} +!2 = distinct !{!2, !"test_f"}