Index: llvm/include/llvm/Analysis/VectorUtils.h =================================================================== --- llvm/include/llvm/Analysis/VectorUtils.h +++ llvm/include/llvm/Analysis/VectorUtils.h @@ -255,8 +255,11 @@ /// metadata value that covers all of the individual values), and set I's /// metadata for M equal to the intersection value. /// +/// When RemoveNoAlias is true, MD_noalias will always get a null value. +/// /// This function always sets a (possibly null) value for each K in Kinds. -Instruction *propagateMetadata(Instruction *I, ArrayRef VL); +Instruction *propagateMetadata(Instruction *I, ArrayRef VL, + bool RemoveNoAlias = true); /// Create a mask that filters the members of an interleave group where there /// are gaps. Index: llvm/lib/Analysis/VectorUtils.cpp =================================================================== --- llvm/lib/Analysis/VectorUtils.cpp +++ llvm/lib/Analysis/VectorUtils.cpp @@ -592,7 +592,8 @@ } /// \returns \p I after propagating metadata from \p VL. -Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef VL) { +Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef VL, + bool RemoveNoAlias) { Instruction *I0 = cast(VL[0]); SmallVector, 4> Metadata; I0->getAllMetadataOtherThanDebugLoc(Metadata); @@ -603,6 +604,8 @@ LLVMContext::MD_access_group}) { MDNode *MD = I0->getMetadata(Kind); + if (RemoveNoAlias && (Kind == LLVMContext::MD_noalias)) + MD = nullptr; for (int J = 1, E = VL.size(); MD && J != E; ++J) { const Instruction *IJ = cast(VL[J]); MDNode *IMD = IJ->getMetadata(Kind); @@ -628,7 +631,6 @@ llvm_unreachable("unhandled metadata"); } } - Inst->setMetadata(Kind, MD); } Index: llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp @@ -280,9 +280,10 @@ // The real propagateMetadata expects a SmallVector, but we deal in // vectors of Instructions. -static void propagateMetadata(Instruction *I, ArrayRef IL) { +static void propagateMetadata(Instruction *I, ArrayRef IL, + bool RemoveNoAlias) { SmallVector VL(IL.begin(), IL.end()); - propagateMetadata(I, VL); + propagateMetadata(I, VL, RemoveNoAlias); } // Vectorizer Implementation @@ -1074,11 +1075,15 @@ } } + bool HasSideChannel = + std::any_of(Chain.begin(), Chain.end(), [](const auto &I) { + return cast(I)->hasNoaliasSideChannelOperand(); + }); StoreInst *SI = Builder.CreateAlignedStore( Vec, Builder.CreateBitCast(S0->getPointerOperand(), VecTy->getPointerTo(AS)), Alignment); - propagateMetadata(SI, Chain); + propagateMetadata(SI, Chain, HasSideChannel); eraseInstructions(Chain); ++NumVectorInstructions; @@ -1191,10 +1196,14 @@ std::tie(First, Last) = getBoundaryInstrs(Chain); Builder.SetInsertPoint(&*First); + bool HasSideChannel = + std::any_of(Chain.begin(), Chain.end(), [](const auto &I) { + return cast(I)->hasNoaliasSideChannelOperand(); + }); Value *Bitcast = Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS)); LoadInst *LI = Builder.CreateAlignedLoad(VecTy, Bitcast, Alignment); - propagateMetadata(LI, Chain); + propagateMetadata(LI, Chain, HasSideChannel); if (VecLoadTy) { SmallVector InstrsToErase; Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -907,7 +907,13 @@ void InnerLoopVectorizer::addMetadata(Instruction *To, Instruction *From) { - propagateMetadata(To, From); + bool HasSideChannel = true; + if (auto *SI = dyn_cast(From)) { + HasSideChannel = SI->hasNoaliasSideChannelOperand(); + } else if (auto *LI = dyn_cast(From)) { + HasSideChannel = LI->hasNoaliasSideChannelOperand(); + } + propagateMetadata(To, From, HasSideChannel); addNewMetadata(To, From); } Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1273,6 +1273,53 @@ } } + /// Set the operands of this bundle of load or store instructions in their + /// original order. + void setLoadStoreOperandsInOrder() { + assert(Operands.empty() && "Already initialized?"); + auto *I0 = cast(Scalars[0]); + assert((isa(I0) || isa(I0)) && + "Expect a load or store instruction"); + unsigned NumOperands = isa(I0) ? 1 : 2; + + // Check if any instruction has a noalias_sidechannel + bool HasSideChannel = + std::any_of(Scalars.begin(), Scalars.end(), [&](auto *V) { + return cast(V)->getNumOperands() != NumOperands; + }); + + Operands.resize(NumOperands + HasSideChannel); + unsigned NumLanes = Scalars.size(); + for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { + Operands[OpIdx].resize(NumLanes); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + auto *I = cast(Scalars[Lane]); + assert(((I->getNumOperands() == NumOperands) || + (I->getNumOperands() == NumOperands + 1)) && + "Expected same number of operands (ignoring the " + "noalias_sidechannel"); + Operands[OpIdx][Lane] = I->getOperand(OpIdx); + } + } + + if (HasSideChannel) { + // At least one instruction has a noalias_sidechannel. + // Keep track of the dependencies brought in by it. Later on we will + // omit the noalias information. + unsigned OpIdx = NumOperands; + Operands[OpIdx].resize(NumLanes); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + auto *I = cast(Scalars[Lane]); + if (I->getNumOperands() != NumOperands) { + Operands[OpIdx][Lane] = I->getOperand(OpIdx); + } else { + Operands[OpIdx][Lane] = + UndefValue::get(I->getOperand(OpIdx - 1)->getType()); + } + } + } + } + /// \returns the \p OpIdx operand of this TreeEntry. ValueList &getOperand(unsigned OpIdx) { assert(OpIdx < Operands.size() && "Off bounds"); @@ -2454,7 +2501,7 @@ ++NumOpsWantToKeepOriginalOrder; TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n"); } else { // Need to reorder. @@ -2463,7 +2510,7 @@ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies, I->getFirst()); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n"); } return; @@ -2681,7 +2728,7 @@ ValueList Operands; for (Value *V : VL) Operands.push_back(cast(V)->getOperand(0)); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); buildTree_rec(Operands, Depth + 1, {TE, 0}); return; } @@ -3829,7 +3876,7 @@ PointerType *PtrTy = PointerType::get(VecTy, LI->getPointerAddressSpace()); Value *Ptr = Builder.CreateBitCast(LI->getOperand(0), PtrTy); LoadInst *V = Builder.CreateAlignedLoad(VecTy, Ptr, LI->getAlignment()); - Value *NewV = propagateMetadata(V, E->Scalars); + Value *NewV = propagateMetadata(V, E->Scalars, true); if (!E->ReorderIndices.empty()) { OrdersType Mask; inversePermutation(E->ReorderIndices, Mask); @@ -4033,7 +4080,7 @@ if (!Alignment) Alignment = MaybeAlign(DL->getABITypeAlignment(ScalarLoadTy)); LI->setAlignment(Alignment); - Value *V = propagateMetadata(LI, E->Scalars); + Value *V = propagateMetadata(LI, E->Scalars, (E->getNumOperands() == 2)); if (IsReorder) { OrdersType Mask; inversePermutation(E->ReorderIndices, Mask); @@ -4071,7 +4118,7 @@ Alignment = DL->getABITypeAlignment(SI->getValueOperand()->getType()); ST->setAlignment(Align(Alignment)); - Value *V = propagateMetadata(ST, E->Scalars); + Value *V = propagateMetadata(ST, E->Scalars, (E->getNumOperands() == 3)); if (NeedToShuffleReuses) { V = Builder.CreateShuffleVector(V, UndefValue::get(VecTy), E->ReuseShuffleIndices, "shuffle"); Index: llvm/test/Transforms/SLPVectorizer/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SLPVectorizer/noalias.ll @@ -0,0 +1,55 @@ +; RUN: opt -S < %s -slp-vectorizer -slp-max-reg-size=128 -slp-min-reg-size=128 | FileCheck %s + +; SLP vectorization across a @llvm.side.noalias and side channel + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.sideeffect() #0 + +define void @test(float* %p) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[P1_DECL:%.*]] = tail call i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float** null, i32 0, metadata !0) +; CHECK-NEXT: [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0 +; CHECK-NEXT: [[P1:%.*]] = getelementptr float, float* [[P]], i64 1 +; CHECK-NEXT: [[SIDE_P1:%.*]] = tail call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* [[P1]], i8* [[P1_DECL]], float** null, float** undef, i32 0, metadata !0), !noalias !0 +; CHECK-NEXT: [[P2:%.*]] = getelementptr float, float* [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr float, float* [[P]], i64 3 +; CHECK-NEXT: call void @llvm.sideeffect() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P0]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.sideeffect() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[P0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: ret void +; + %p1.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float** null, i32 0, metadata !0) + %p0 = getelementptr float, float* %p, i64 0 + %p1 = getelementptr float, float* %p, i64 1 + %side.p1 = tail call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %p1, i8* %p1.decl, float** null, float** undef, i32 0, metadata !0), !noalias !0 + %p2 = getelementptr float, float* %p, i64 2 + %p3 = getelementptr float, float* %p, i64 3 + %l0 = load float, float* %p0, !noalias !0 + %l1 = load float, float* %p1, noalias_sidechannel float* %side.p1, !noalias !0 + %l2 = load float, float* %p2, !noalias !0 + call void @llvm.sideeffect() + %l3 = load float, float* %p3, !noalias !0 + store float %l0, float* %p0, !noalias !0 + call void @llvm.sideeffect() + store float %l1, float* %p1, noalias_sidechannel float* %side.p1, !noalias !0 + store float %l2, float* %p2, !noalias !0 + store float %l3, float* %p3, !noalias !0 + ret void +} + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float**, i32, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float*, i8*, float**, float**, i32, metadata) #2 + +attributes #0 = { inaccessiblememonly nounwind willreturn } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!0 = !{!1} +!1 = distinct !{!1, !2, !"test_f: p"} +!2 = distinct !{!2, !"test_f"}