Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1264,6 +1264,28 @@ } } + /// Set the operands of this bundle in their original order. + void setLoadStoreOperandsInOrder() { + assert(Operands.empty() && "Already initialized?"); + auto *I0 = cast(Scalars[0]); + assert((isa(I0) || isa(I0)) && + "Expect a load or store instruction"); + unsigned NumOperands = isa(I0) ? 1 : 2; + Operands.resize(NumOperands); + unsigned NumLanes = Scalars.size(); + for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { + Operands[OpIdx].resize(NumLanes); + for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { + auto *I = cast(Scalars[Lane]); + assert(((I->getNumOperands() == NumOperands) || + (I->getNumOperands() == NumOperands + 1)) && + "Expected same number of operands (ignoring the " + "noalias_sidechannel"); + Operands[OpIdx][Lane] = I->getOperand(OpIdx); + } + } + } + /// \returns the \p OpIdx operand of this TreeEntry. ValueList &getOperand(unsigned OpIdx) { assert(OpIdx < Operands.size() && "Off bounds"); @@ -2445,7 +2467,7 @@ ++NumOpsWantToKeepOriginalOrder; TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n"); } else { // Need to reorder. @@ -2454,7 +2476,7 @@ TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies, I->getFirst()); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n"); } return; @@ -2672,7 +2694,7 @@ ValueList Operands; for (Value *V : VL) Operands.push_back(cast(V)->getOperand(0)); - TE->setOperandsInOrder(); + TE->setLoadStoreOperandsInOrder(); buildTree_rec(Operands, Depth + 1, {TE, 0}); return; } Index: llvm/test/Transforms/SLPVectorizer/noalias.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SLPVectorizer/noalias.ll @@ -0,0 +1,55 @@ +; RUN: opt -S < %s -slp-vectorizer -slp-max-reg-size=128 -slp-min-reg-size=128 | FileCheck %s + +; SLP vectorization across a @llvm.side.noalias and side channel + +; Function Attrs: inaccessiblememonly nounwind willreturn +declare void @llvm.sideeffect() #0 + +define void @test(float* %p) { +; CHECK-LABEL: @test( +; CHECK-NEXT: [[P1_DECL:%.*]] = tail call i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float** null, i32 0, metadata !0) +; CHECK-NEXT: [[P0:%.*]] = getelementptr float, float* [[P:%.*]], i64 0 +; CHECK-NEXT: [[P1:%.*]] = getelementptr float, float* [[P]], i64 1 +; CHECK-NEXT: [[SIDE_P1:%.*]] = tail call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* [[P1]], i8* [[P1_DECL]], float** null, float** undef, i32 0, metadata !0), !noalias !0 +; CHECK-NEXT: [[P2:%.*]] = getelementptr float, float* [[P]], i64 2 +; CHECK-NEXT: [[P3:%.*]] = getelementptr float, float* [[P]], i64 3 +; CHECK-NEXT: call void @llvm.sideeffect() +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[P0]] to <4 x float>* +; CHECK-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 +; CHECK-NEXT: call void @llvm.sideeffect() +; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[P0]] to <4 x float>* +; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4 +; CHECK-NEXT: ret void +; + %p1.decl = tail call i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float** null, i32 0, metadata !0) + %p0 = getelementptr float, float* %p, i64 0 + %p1 = getelementptr float, float* %p, i64 1 + %side.p1 = tail call float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float* %p1, i8* %p1.decl, float** null, float** undef, i32 0, metadata !0), !noalias !0 + %p2 = getelementptr float, float* %p, i64 2 + %p3 = getelementptr float, float* %p, i64 3 + %l0 = load float, float* %p0, !noalias !0 + %l1 = load float, float* %p1, noalias_sidechannel float* %side.p1, !noalias !0 + %l2 = load float, float* %p2, !noalias !0 + call void @llvm.sideeffect() + %l3 = load float, float* %p3, !noalias !0 + store float %l0, float* %p0, !noalias !0 + call void @llvm.sideeffect() + store float %l1, float* %p1, noalias_sidechannel float* %side.p1, !noalias !0 + store float %l2, float* %p2, !noalias !0 + store float %l3, float* %p3, !noalias !0 + ret void +} + +; Function Attrs: argmemonly nounwind +declare i8* @llvm.noalias.decl.p0i8.p0p0f32.i32(float**, i32, metadata) #1 + +; Function Attrs: nounwind readnone speculatable +declare float* @llvm.side.noalias.p0f32.p0i8.p0p0f32.p0p0f32.i32(float*, i8*, float**, float**, i32, metadata) #2 + +attributes #0 = { inaccessiblememonly nounwind willreturn } +attributes #1 = { argmemonly nounwind } +attributes #2 = { nounwind readnone speculatable } + +!0 = !{!1} +!1 = distinct !{!1, !2, !"test_f: p"} +!2 = distinct !{!2, !"test_f"}