Index: llvm/include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -516,7 +516,8 @@ class LoopAccessInfo { public: LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, - AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI); + const TargetTransformInfo *TTI, AliasAnalysis *AA, + DominatorTree *DT, LoopInfo *LI); /// Return true we can analyze the memory accesses in the loop and there are /// no memory dependence cycles. @@ -608,7 +609,8 @@ private: /// Analyze the loop. void analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, - const TargetLibraryInfo *TLI, DominatorTree *DT); + const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, + DominatorTree *DT); /// Check if the structure of the loop allows it to be analyzed by this /// pass. @@ -626,7 +628,8 @@ /// /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop /// invariant. - void collectStridedAccess(Value *LoadOrStoreInst); + void collectStridedAccess(Value *LoadOrStoreInst, + const TargetTransformInfo *TTI); std::unique_ptr PSE; @@ -750,6 +753,7 @@ // The used analysis passes. ScalarEvolution *SE = nullptr; const TargetLibraryInfo *TLI = nullptr; + const TargetTransformInfo *TTI = nullptr; AliasAnalysis *AA = nullptr; DominatorTree *DT = nullptr; LoopInfo *LI = nullptr; Index: llvm/include/llvm/Analysis/TargetTransformInfo.h =================================================================== --- llvm/include/llvm/Analysis/TargetTransformInfo.h +++ llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -606,6 +606,10 @@ /// Return true if the target supports masked expand load. bool isLegalMaskedExpandLoad(Type *DataType) const; + /// Returns true if the target machine can represent a vectorized version + /// of \p V as a masked gather or scatter operation. + bool isLegalGatherOrScatter(Value *V) const; + /// Return true if the target has a unified operation to calculate division /// and remainder. If so, the additional implicit multiplication and /// subtraction required to calculate a remainder from division are free. This Index: llvm/lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -124,6 +124,10 @@ "enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning")); +static cl::opt PreferGatherOverStrideCheck( + "prefer-gather-over-stride-check", cl::init(true), cl::Hidden, + cl::desc("Prefer Gather/Scatter over symbolic stride versioning")); + /// Enable store-to-load forwarding conflict detection. This option can /// be disabled for correctness testing. static cl::opt EnableForwardingConflictDetection( @@ -1789,6 +1793,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, DominatorTree *DT) { typedef SmallPtrSet ValueSet; @@ -1866,7 +1871,7 @@ Loads.push_back(Ld); DepChecker->addAccess(Ld); if (EnableMemAccessVersioning) - collectStridedAccess(Ld); + collectStridedAccess(Ld, TTI); continue; } @@ -1890,7 +1895,7 @@ Stores.push_back(St); DepChecker->addAccess(St); if (EnableMemAccessVersioning) - collectStridedAccess(St); + collectStridedAccess(St, TTI); } } // Next instr. } // Next block. @@ -2279,7 +2284,8 @@ return addRuntimeChecks(Loc, PtrRtChecking->getChecks()); } -void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { +void LoopAccessInfo::collectStridedAccess(Value *MemAccess, + const TargetTransformInfo *TTI) { Value *Ptr = nullptr; if (LoadInst *LI = dyn_cast(MemAccess)) Ptr = LI->getPointerOperand(); @@ -2296,18 +2302,28 @@ "versioning:"); LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *Stride << "\n"); - // Avoid adding the "Stride == 1" predicate when we know that - // Stride >= Trip-Count. Such a predicate will effectively optimize a single - // or zero iteration loop, as Trip-Count <= Stride == 1. + // If this is load/store could equally be represented as a gather/scatter, as + // opposed to adding a unit stride runtime check, the gather/scatter is likely + // to be useful in more cases (even if it might be slower than a sequential + // load). // // TODO: We are currently not making a very informed decision on when it is // beneficial to apply stride versioning. It might make more sense that the // users of this analysis (such as the vectorizer) will trigger it, based on // their specific cost considerations; For example, in cases where stride - // versioning does not help resolving memory accesses/dependences, the + // versioning does not help resolving memory accesses/dependences, the // vectorizer should evaluate the cost of the runtime test, and the benefit // of various possible stride specializations, considering the alternatives // of using gather/scatters (if available). + if (PreferGatherOverStrideCheck && TTI && + TTI->isLegalGatherOrScatter(MemAccess)) { + LLVM_DEBUG(dbgs() << "LAA: But leaving as a gather/scatter instead.\n"); + return; + } + + // Avoid adding the "Stride == 1" predicate when we know that + // Stride >= Trip-Count. Such a predicate will effectively optimize a single + // or zero iteration loop, as Trip-Count <= Stride == 1. const SCEV *StrideExpr = PSE->getSCEV(Stride); const SCEV *BETakenCount = PSE->getBackedgeTakenCount(); @@ -2343,8 +2359,10 @@ } LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, - const TargetLibraryInfo *TLI, AliasAnalysis *AA, - DominatorTree *DT, LoopInfo *LI) + const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, + AliasAnalysis *AA, DominatorTree *DT, + LoopInfo *LI) : PSE(std::make_unique(*SE, *L)), PtrRtChecking(std::make_unique(SE)), DepChecker(std::make_unique(*PSE, L)), TheLoop(L), @@ -2352,7 +2370,7 @@ HasConvergentOp(false), HasDependenceInvolvingLoopInvariantAddress(false) { if (canAnalyzeLoop()) - analyzeLoop(AA, LI, TLI, DT); + analyzeLoop(AA, LI, TLI, TTI, DT); } void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { @@ -2406,7 +2424,7 @@ auto &LAI = LoopAccessInfoMap[L]; if (!LAI) - LAI = std::make_unique(L, SE, TLI, AA, DT, LI); + LAI = std::make_unique(L, SE, TLI, TTI, AA, DT, LI); return *LAI.get(); } @@ -2426,6 +2444,8 @@ SE = &getAnalysis().getSE(); auto *TLIP = getAnalysisIfAvailable(); TLI = TLIP ? &TLIP->getTLI(F) : nullptr; + auto *TTIP = getAnalysisIfAvailable(); + TTI = TTIP ? &TTIP->getTTI(F) : nullptr; AA = &getAnalysis().getAAResults(); DT = &getAnalysis().getDomTree(); LI = &getAnalysis().getLoopInfo(); @@ -2457,7 +2477,7 @@ LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR) { - return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI); + return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.TTI, &AR.AA, &AR.DT, &AR.LI); } namespace llvm { Index: llvm/lib/Analysis/TargetTransformInfo.cpp =================================================================== --- llvm/lib/Analysis/TargetTransformInfo.cpp +++ llvm/lib/Analysis/TargetTransformInfo.cpp @@ -333,6 +333,17 @@ return TTIImpl->isLegalMaskedExpandLoad(DataType); } +bool TargetTransformInfo::isLegalGatherOrScatter(Value *V) const { + LoadInst *LI = dyn_cast(V); + StoreInst *SI = dyn_cast(V); + if (!LI && !SI) + return false; + Type *Ty = LI ? LI->getType() : SI->getValueOperand()->getType(); + MaybeAlign Align = getLoadStoreAlignment(V); + return (LI && isLegalMaskedGather(Ty, Align)) || + (SI && isLegalMaskedScatter(Ty, Align)); +} + bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const { return TTIImpl->hasDivRemOp(DataType, IsSigned); } Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1207,31 +1207,6 @@ TTI.isLegalMaskedLoad(DataType, Alignment); } - /// Returns true if the target machine supports masked scatter operation - /// for the given \p DataType. - bool isLegalMaskedScatter(Type *DataType, MaybeAlign Alignment) { - return TTI.isLegalMaskedScatter(DataType, Alignment); - } - - /// Returns true if the target machine supports masked gather operation - /// for the given \p DataType. - bool isLegalMaskedGather(Type *DataType, MaybeAlign Alignment) { - return TTI.isLegalMaskedGather(DataType, Alignment); - } - - /// Returns true if the target machine can represent \p V as a masked gather - /// or scatter operation. - bool isLegalGatherOrScatter(Value *V) { - bool LI = isa(V); - bool SI = isa(V); - if (!LI && !SI) - return false; - auto *Ty = getMemInstValueType(V); - MaybeAlign Align = getLoadStoreAlignment(V); - return (LI && isLegalMaskedGather(Ty, Align)) || - (SI && isLegalMaskedScatter(Ty, Align)); - } - /// Returns true if \p I is an instruction that will be scalarized with /// predication. Such instructions include conditional stores and /// instructions that may divide by zero. @@ -4618,10 +4593,10 @@ return WideningDecision == CM_Scalarize; } const MaybeAlign Alignment = getLoadStoreAlignment(I); - return isa(I) ? !(isLegalMaskedLoad(Ty, Ptr, Alignment) || - isLegalMaskedGather(Ty, Alignment)) - : !(isLegalMaskedStore(Ty, Ptr, Alignment) || - isLegalMaskedScatter(Ty, Alignment)); + bool LegalGather = TTI.isLegalGatherOrScatter(I); + return !(LegalGather || isa(I) + ? isLegalMaskedLoad(Ty, Ptr, Alignment) + : isLegalMaskedStore(Ty, Ptr, Alignment)); } case Instruction::UDiv: case Instruction::SDiv: @@ -5169,7 +5144,7 @@ // optimization to non-pointer types. // if (T->isPointerTy() && !isConsecutiveLoadOrStore(&I) && - !isAccessInterleaved(&I) && !isLegalGatherOrScatter(&I)) + !isAccessInterleaved(&I) && !TTI.isLegalGatherOrScatter(&I)) continue; MinWidth = std::min(MinWidth, @@ -6058,7 +6033,7 @@ } unsigned GatherScatterCost = - isLegalGatherOrScatter(&I) + TTI.isLegalGatherOrScatter(&I) ? getGatherScatterCost(&I, VF) * NumAccesses : std::numeric_limits::max(); Index: llvm/test/Transforms/LoopVectorize/ARM/mve-mat-mul.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/ARM/mve-mat-mul.ll +++ llvm/test/Transforms/LoopVectorize/ARM/mve-mat-mul.ll @@ -27,36 +27,36 @@ ; CHECK: for.cond8.preheader.us.us: ; CHECK-NEXT: [[J_051_US_US:%.*]] = phi i32 [ [[INC21_US_US:%.*]], [[FOR_COND8_FOR_COND_CLEANUP10_CRIT_EDGE_US_US:%.*]] ], [ 0, [[FOR_COND8_PREHEADER_US_US_PREHEADER]] ] ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[L]], 4 -; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] -; CHECK: vector.scevcheck: -; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i32 [[M]], 1 -; CHECK-NEXT: [[TMP0:%.*]] = or i1 false, [[IDENT_CHECK]] -; CHECK-NEXT: br i1 [[TMP0]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[L]], 4 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[L]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[M]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> undef, i32 [[J_051_US_US]], i32 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> undef, <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[INDEX]], i32 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer -; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], -; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], [[MUL_US]] -; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP1]], [[M]] -; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[J_051_US_US]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i32 [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>* -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4 -; CHECK-NEXT: [[TMP11:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP12]] = add nsw <4 x i32> [[TMP11]], [[VEC_PHI]] +; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1 +; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[INDEX]], 2 +; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 3 +; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[TMP0]], [[MUL_US]] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP4]] +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>* +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = mul <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[TMP8]], [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], <4 x i32> [[TMP9]] +; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP10]], i32 4, <4 x i1> , <4 x i32> undef) +; CHECK-NEXT: [[TMP11:%.*]] = mul nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP12]] = add <4 x i32> [[TMP11]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ; CHECK: middle.block: @@ -64,8 +64,8 @@ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[L]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND8_FOR_COND_CLEANUP10_CRIT_EDGE_US_US]], label [[SCALAR_PH]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_COND8_PREHEADER_US_US]] ], [ 0, [[VECTOR_SCEVCHECK]] ] -; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_COND8_PREHEADER_US_US]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_COND8_PREHEADER_US_US]] ] +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_COND8_PREHEADER_US_US]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: br label [[FOR_BODY11_US_US:%.*]] ; CHECK: for.cond8.for.cond.cleanup10_crit_edge.us.us: ; CHECK-NEXT: [[ADD16_US_US_LCSSA:%.*]] = phi i32 [ [[ADD16_US_US:%.*]], [[FOR_BODY11_US_US]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] Index: llvm/test/Transforms/LoopVectorize/X86/optsize.ll =================================================================== --- llvm/test/Transforms/LoopVectorize/X86/optsize.ll +++ llvm/test/Transforms/LoopVectorize/X86/optsize.ll @@ -140,15 +140,46 @@ ; We can't vectorize this one because we version for stride==1; even having TC ; a multiple of VF. -; CHECK-LABEL: @scev4stride1 +; CHECK-LABEL: @scev4stride1_16 ; CHECK-NOT: vector.scevcheck ; CHECK-NOT: vector.body: ; CHECK-LABEL: for.body: -; AUTOVF-LABEL: @scev4stride1 +; AUTOVF-LABEL: @scev4stride1_16 ; AUTOVF-NOT: vector.scevcheck ; AUTOVF-NOT: vector.body: ; AUTOVF-LABEL: for.body: -define void @scev4stride1(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %k) #2 { +define void @scev4stride1_16(i16* noalias nocapture %a, i16* noalias nocapture readonly %b, i32 %k) #2 { +for.body.preheader: + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] + %mul = mul nsw i32 %i.07, %k + %arrayidx = getelementptr inbounds i16, i16* %b, i32 %mul + %0 = load i16, i16* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i16, i16* %a, i32 %i.07 + store i16 %0, i16* %arrayidx1, align 4 + %inc = add nuw nsw i32 %i.07, 1 + %exitcond = icmp eq i32 %inc, 256 + br i1 %exitcond, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + ret void +} + +; We can vectorize this one because we can instead use gather loads without needing runtime checks. +; These checks make sure that the scalar remainder loop will not be called. +; CHECK-LABEL: @scev4stride1_32 +; CHECK-NOT: vector.scevcheck +; CHECK: br i1 false, label %scalar.ph, label %vector.ph +; CHECK: %cmp.n = icmp eq i32 256, 256 +; CHECK: br i1 %cmp.n, label %for.end.loopexit, label %scalar.ph +; AUTOVF-LABEL: @scev4stride1_32 +; AUTOVF-NOT: vector.scevcheck +; AUTOVF: br i1 false, label %scalar.ph, label %vector.ph +; AUTOVF: %cmp.n = icmp eq i32 256, 256 +; AUTOVF: br i1 %cmp.n, label %for.end.loopexit, label %scalar.ph +define void @scev4stride1_32(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i32 %k) #2 { for.body.preheader: br label %for.body Index: llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp =================================================================== --- llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp +++ llvm/unittests/Transforms/Vectorize/VPlanSlpTest.cpp @@ -43,7 +43,7 @@ AARes.reset(new AAResults(TLI)); AARes->addAAResult(*BasicAA); PSE.reset(new PredicatedScalarEvolution(*SE, *L)); - LAI.reset(new LoopAccessInfo(L, &*SE, &TLI, &*AARes, &*DT, &*LI)); + LAI.reset(new LoopAccessInfo(L, &*SE, &TLI, nullptr, &*AARes, &*DT, &*LI)); IAI.reset(new InterleavedAccessInfo(*PSE, L, &*DT, &*LI, &*LAI)); IAI->analyzeInterleaving(false); return {Plan, *IAI};