diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -258,6 +258,10 @@ SmallVector getInstructionsForAccess(Value *Ptr, bool isWrite) const; + const SmallVector &getUnsafeDependences() const { + return UnsafeDependences; + } + private: /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and /// applies dynamic knowledge to simplify SCEV expressions and convert them @@ -304,6 +308,10 @@ /// RecordDependences is true. SmallVector Dependences; + /// Unsafe memory dependences collected during the analysis + /// Used by for OptRemark generation. + SmallVector UnsafeDependences; + /// Check whether there is a plausible dependence between the two /// accesses. /// @@ -524,6 +532,14 @@ /// PSE must be emitted in order for the results of this analysis to be valid. class LoopAccessInfo { public: + /// Reasons why memory accesses cannot be vectorized (used for OptRemarks) + enum class FailureReason { + UnsafeDataDependence, + UnsafeDataDependenceTriedRT, + UnknownArrayBounds, + Unknown + }; + LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI); @@ -531,6 +547,10 @@ /// no memory dependence cycles. bool canVectorizeMemory() const { return CanVecMem; } + /// Return reason describing why memory access cannot be vectorized. + // Used for the OptRemark generation. + FailureReason getFailureReason() const { return FailReason; } + /// Return true if there is a convergent operation in the loop. There may /// still be reported runtime pointer checks that would be required, but it is /// not legal to insert them. @@ -589,6 +609,10 @@ return HasDependenceInvolvingLoopInvariantAddress; } + const SmallPtrSet &getUncomputablePtrs() const { + return UncomputablePtrs; + } + /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts /// them to a more usable form. All SCEV expressions during the analysis /// should be re-written (and therefore simplified) according to PSE. @@ -653,6 +677,13 @@ /// Set of symbolic strides values. SmallPtrSet StrideSet; + + /// Reason why memory accesses cannot be vectorized (used for OptRemarks) + FailureReason FailReason; + + /// Set of uncomputable pointers. + /// Used when emitting OptRemarks + SmallPtrSet UncomputablePtrs; }; Value *stripIntegerCast(Value *V); diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h --- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -402,6 +402,10 @@ /// If false, good old LV code. bool canVectorizeLoopNestCFG(Loop *Lp, bool UseVPlanNativePath); + /// Elaborate on the summary report from LoopAccessAnalysis + /// with more remarks based on the failure reasons. + void elaborateMemoryReport(); + /// Set up outer loop inductions by checking Phis in outer loop header for /// supported inductions (int inductions). Return false if any of these Phis /// is not a supported induction or if we fail to find an induction. @@ -463,6 +467,9 @@ return LAI ? &LAI->getSymbolicStrides() : nullptr; } + OptimizationRemarkAnalysis + createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const; + /// The loop that we evaluate. Loop *TheLoop; diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -584,6 +584,11 @@ MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; } + /// Set of uncomputable pointers. + // + // Used when emitting no_vec_unknown_array_bounds insight. + SmallPtrSet UncomputablePtrs; + private: typedef SetVector PtrAccessSet; @@ -768,6 +773,7 @@ for (auto &Access : AccessInfos) { if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop, RunningDepId, ASId, ShouldCheckWrap, false)) { + UncomputablePtrs.insert(Access.getPointer()); LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Access.getPointer() << '\n'); Retries.push_back(Access); @@ -1711,6 +1717,15 @@ isDependent(*A.first, A.second, *B.first, B.second, Strides); mergeInStatus(Dependence::isSafeForVectorization(Type)); + // Runtime checks are only feasible, if all unsafe dependencies are + // unknown. For other unsafe deps, we already know they will fail + // the runtime checks at compile time. + if (!isSafeForVectorization()) { + // TODO: Add minDistanc, actual distance, minIter and type size + // for unsafe dependences to generate better insight + UnsafeDependences.push_back(Dependence(A.second, B.second, Type)); + } + // Gather dependences unless we accumulated MaxDependences // dependences. In that case return as soon as we find the first // unsafe dependence. This puts a limit on this quadratic @@ -2026,6 +2041,8 @@ LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " << "the array bounds.\n"); CanVecMem = false; + FailReason = FailureReason::UnknownArrayBounds; + UncomputablePtrs = std::move(Accesses.UncomputablePtrs); return; } @@ -2058,6 +2075,7 @@ << "cannot check memory dependencies at runtime"; LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); CanVecMem = false; + FailReason = FailureReason::UnsafeDataDependenceTriedRT; return; } @@ -2086,6 +2104,7 @@ "to attempt to isolate the offending operations into a separate " "loop"; LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); + FailReason = FailureReason::UnsafeDataDependence; } } @@ -2195,8 +2214,8 @@ PtrRtChecking(std::make_unique(SE)), DepChecker(std::make_unique(*PSE, L)), TheLoop(L), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false), - HasConvergentOp(false), - HasDependenceInvolvingLoopInvariantAddress(false) { + HasConvergentOp(false), HasDependenceInvolvingLoopInvariantAddress(false), + FailReason(FailureReason::Unknown) { if (canAnalyzeLoop()) analyzeLoop(AA, LI, TLI, DT); } diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -77,6 +77,34 @@ namespace llvm { +/// Create an analysis remark that explains why vectorization failed +/// +/// \p PassName is the name of the pass (e.g. can be AlwaysPrint). \p +/// RemarkName is the identifier for the remark. If \p I is passed it is an +/// instruction that prevents vectorization. Otherwise \p TheLoop is used for +/// the location of the remark. \return the remark object that can be +/// streamed to. +static OptimizationRemarkAnalysis +createMissedAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop, + Instruction *I = nullptr) { + Value *CodeRegion = TheLoop->getHeader(); + DebugLoc StartLoc = TheLoop->getLocRange().getStart(); + + if (I) { + CodeRegion = I->getParent(); + // If there is no debug location attached to the instruction, or if the + // location is invalid, revert back to using the loop's. + if (DebugLoc DL = I->getDebugLoc()) + if (DL->getLine() != 0) + StartLoc = DL; + } + + auto LocRange = DiagnosticLocation(StartLoc); + OptimizationRemarkAnalysis R(PassName, RemarkName, LocRange, CodeRegion); + R << "loop not vectorized: "; + return R; +} + bool LoopVectorizeHints::Hint::validate(unsigned Val) { switch (Kind) { case HK_WIDTH: @@ -419,6 +447,18 @@ return false; } +/// Create an analysis remark that explains why vectorization failed +/// +/// \p RemarkName is the identifier for the remark. If \p I is passed it is +/// an instruction that prevents vectorization. Otherwise the loop is used +/// for the location of the remark. \return the remark object that can be +/// streamed to. +OptimizationRemarkAnalysis LoopVectorizationLegality::createMissedAnalysis( + StringRef RemarkName, Instruction *I /*= nullptr*/) const { + return ::createMissedAnalysis(Hints->vectorizeAnalysisPassName(), RemarkName, + TheLoop, I); +} + int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) const { const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); @@ -876,6 +916,95 @@ return true; } +static DebugLoc getDebugLocFromInstruction(Instruction *I) { + DebugLoc Loc; + if (auto *D = dyn_cast(I)) { + Loc = D->getDebugLoc(); + + if (auto *DD = dyn_cast(isa(D) + ? cast(D)->getRawDest() + : getPointerOperand(D))) + Loc = DD->getDebugLoc(); + } + return Loc; +} + +/// Add memory access related remarks for TheLoop. +void LoopVectorizationLegality::elaborateMemoryReport() { + switch (LAI->getFailureReason()) { + case LoopAccessInfo::FailureReason::UnsafeDataDependence: { + const auto &UnsafeDependences = LAI->getDepChecker().getUnsafeDependences(); + unsigned NumUnsafeDeps = UnsafeDependences.size(); + assert(NumUnsafeDeps > 0 && "expected unsafe dependencies but found none"); + + // Emit detailed remarks for each unsafe dependence + for (const auto &Dep : UnsafeDependences) { + switch (Dep.Type) { + case MemoryDepChecker::Dependence::NoDep: + case MemoryDepChecker::Dependence::Forward: + case MemoryDepChecker::Dependence::BackwardVectorizable: + // Don't emit a remark for dependences that don't block vectorization. + continue; + default: + break; + } + + DebugLoc SourceLoc = getDebugLocFromInstruction(Dep.getSource(*LAI)); + DebugLoc DestinationLoc = + getDebugLocFromInstruction(Dep.getDestination(*LAI)); + + OptimizationRemarkAnalysis R(Hints->vectorizeAnalysisPassName(), + "UnsafeDep", DestinationLoc, + TheLoop->getHeader()); + R << "loop not vectorized: "; + std::string LocText = " Memory location is the same as accessed at line "; + if (SourceLoc) { + LocText += std::to_string(SourceLoc.getLine()) + ":" + + std::to_string(SourceLoc.getCol()); + } else { + LocText += ":"; + } + switch (Dep.Type) { + case MemoryDepChecker::Dependence::NoDep: + case MemoryDepChecker::Dependence::Forward: + case MemoryDepChecker::Dependence::BackwardVectorizable: + llvm_unreachable("Unexpected dependency"); + case MemoryDepChecker::Dependence::Backward: + ORE->emit(R << "Backward loop carried data dependence." + LocText); + break; + case MemoryDepChecker::Dependence::ForwardButPreventsForwarding: + ORE->emit(R << "Forward loop carried data dependence that prevents " + "store-to-load forwarding." + + LocText); + break; + case MemoryDepChecker::Dependence:: + BackwardVectorizableButPreventsForwarding: + ORE->emit(R << "Backward loop carried data dependence that prevents " + "store-to-load forwarding." + + LocText); + break; + case MemoryDepChecker::Dependence::Unknown: + ORE->emit(R << "Unknown data dependence." + LocText); + break; + } + } + break; + } + case LoopAccessInfo::FailureReason::UnknownArrayBounds: { + // add detailed remarks at locations of pointers where bound cannot + // be computed + for (Value *Ptr : LAI->getUncomputablePtrs()) + if (auto *I = dyn_cast(Ptr)) + ORE->emit(createMissedAnalysis("UnknownArrayBounds", I) + << "Unknown array bounds"); + break; + } + case LoopAccessInfo::FailureReason::Unknown: + case LoopAccessInfo::FailureReason::UnsafeDataDependenceTriedRT: + break; + } +} + bool LoopVectorizationLegality::canVectorizeMemory() { LAI = &(*GetLAA)(*TheLoop); const OptimizationRemarkAnalysis *LAR = LAI->getReport(); @@ -886,8 +1015,10 @@ }); } - if (!LAI->canVectorizeMemory()) + if (!LAI->canVectorizeMemory()) { + elaborateMemoryReport(); return false; + } if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { reportVectorizationFailure("Stores to a uniform address", diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll --- a/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll @@ -82,6 +82,15 @@ ; YAML-NEXT: - String: 'loop not vectorized: ' ; YAML-NEXT: - String: cannot identify array bounds ; YAML-NEXT: ... +; YAML-NEXT: --- !Analysis +; YAML-NEXT: Pass: '' +; YAML-NEXT: Name: UnknownArrayBounds +; YAML-NEXT: DebugLoc: { File: source.cpp, Line: 19, Column: 5 } +; YAML-NEXT: Function: _Z17test_array_boundsPiS_i +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: Unknown array bounds +; YAML-NEXT: ... ; YAML-NEXT: --- !Missed ; YAML-NEXT: Pass: loop-vectorize ; YAML-NEXT: Name: MissedDetails diff --git a/llvm/test/Transforms/LoopVectorize/loopvectorize-opt-remarks.ll b/llvm/test/Transforms/LoopVectorize/loopvectorize-opt-remarks.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/loopvectorize-opt-remarks.ll @@ -0,0 +1,746 @@ +; RUN: opt -enable-new-pm=0 -loop-vectorize -analyze -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + + +; void test_unknown_bounds(int* A, int* B, int n) { +; for(int i = 0; i < n ; ++i) +; A[i] = A[B[i]] + 1; +; } + +; CHECK: remark: source.c:3:5: loop not vectorized: cannot identify array bounds +; CHECK: remark: source.c:4:16: loop not vectorized: Unknown array bounds + +define dso_local void @test_unknown_bounds(i32* nocapture %A, i32* nocapture readonly %B, i32 %n) local_unnamed_addr #0 !dbg !13 { +entry: + call void @llvm.dbg.value(metadata i32* %A, metadata !19, metadata !DIExpression()), !dbg !24 + call void @llvm.dbg.value(metadata i32* %B, metadata !20, metadata !DIExpression()), !dbg !24 + call void @llvm.dbg.value(metadata i32 %n, metadata !21, metadata !DIExpression()), !dbg !24 + call void @llvm.dbg.value(metadata i32 0, metadata !22, metadata !DIExpression()), !dbg !25 + %cmp10 = icmp sgt i32 %n, 0, !dbg !26 + br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup, !dbg !28 + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64, !dbg !26 + br label %for.body, !dbg !28 + +for.cond.cleanup: ; preds = %for.body, %entry + ret void, !dbg !29 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !22, metadata !DIExpression()), !dbg !25 + %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !30 + %0 = load i32, i32* %arrayidx, align 4, !dbg !30, !tbaa !31 + %idxprom1 = sext i32 %0 to i64, !dbg !35 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35 + %1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !31 + %add = add nsw i32 %1, 1, !dbg !36 + %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !37 + store i32 %add, i32* %arrayidx4, align 4, !dbg !38, !tbaa !31 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !39 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !22, metadata !DIExpression()), !dbg !25 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !26 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !28, !llvm.loop !40 +} + +; // Dependence::NoDep +; // Loop containing only reads does not hinder vectorization +; void test_nodep(int n, int* A, int* B, int* C) { +; for(int i = 1; i < n ; ++i) { +; B[i] = A[i-1]; +; C[i] = A[i+2]; +; } +; } + +; CHECK: remark: source.c:15:3: loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized + +define dso_local void @test_nodep(i32 %n, i32* nocapture readonly %A, i32* nocapture %B, i32* nocapture %C) local_unnamed_addr #0 !dbg !43 { +entry: + call void @llvm.dbg.value(metadata i32 %n, metadata !47, metadata !DIExpression()), !dbg !53 + call void @llvm.dbg.value(metadata i32* %A, metadata !48, metadata !DIExpression()), !dbg !53 + call void @llvm.dbg.value(metadata i32* %B, metadata !49, metadata !DIExpression()), !dbg !53 + call void @llvm.dbg.value(metadata i32* %C, metadata !50, metadata !DIExpression()), !dbg !53 + call void @llvm.dbg.value(metadata i32 1, metadata !51, metadata !DIExpression()), !dbg !54 + %cmp14 = icmp sgt i32 %n, 1, !dbg !55 + br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup, !dbg !57 + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64, !dbg !55 + %0 = add nsw i64 %wide.trip.count, -1, !dbg !57 + %min.iters.check = icmp ult i64 %0, 8, !dbg !57 + br i1 %min.iters.check, label %for.body.preheader40, label %vector.memcheck, !dbg !57 + +vector.memcheck: ; preds = %for.body.preheader + %scevgep = getelementptr i32, i32* %B, i64 1, !dbg !57 + %scevgep21 = getelementptr i32, i32* %B, i64 %wide.trip.count, !dbg !57 + %scevgep23 = getelementptr i32, i32* %C, i64 1, !dbg !57 + %scevgep25 = getelementptr i32, i32* %C, i64 %wide.trip.count, !dbg !57 + %1 = add nuw nsw i64 %wide.trip.count, 2, !dbg !57 + %scevgep28 = getelementptr i32, i32* %A, i64 %1, !dbg !57 + %bound0 = icmp ult i32* %scevgep, %scevgep25, !dbg !57 + %bound1 = icmp ult i32* %scevgep23, %scevgep21, !dbg !57 + %found.conflict = and i1 %bound0, %bound1, !dbg !57 + %bound030 = icmp ult i32* %scevgep, %scevgep28, !dbg !57 + %bound131 = icmp ugt i32* %scevgep21, %A, !dbg !57 + %found.conflict32 = and i1 %bound030, %bound131, !dbg !57 + %conflict.rdx = or i1 %found.conflict, %found.conflict32, !dbg !57 + %bound033 = icmp ult i32* %scevgep23, %scevgep28, !dbg !57 + %bound134 = icmp ugt i32* %scevgep25, %A, !dbg !57 + %found.conflict35 = and i1 %bound033, %bound134, !dbg !57 + %conflict.rdx36 = or i1 %conflict.rdx, %found.conflict35, !dbg !57 + br i1 %conflict.rdx36, label %for.body.preheader40, label %vector.ph, !dbg !57 + +vector.ph: ; preds = %vector.memcheck + %n.vec = and i64 %0, -8, !dbg !57 + %ind.end = or i64 %n.vec, 1, !dbg !57 + br label %vector.body, !dbg !57 + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %offset.idx = or i64 %index, 1 + %2 = getelementptr inbounds i32, i32* %A, i64 %index, !dbg !58 + %3 = bitcast i32* %2 to <4 x i32>*, !dbg !58 + %wide.load = load <4 x i32>, <4 x i32>* %3, align 4, !dbg !58, !tbaa !31, !alias.scope !60 + %4 = getelementptr inbounds i32, i32* %2, i64 4, !dbg !58 + %5 = bitcast i32* %4 to <4 x i32>*, !dbg !58 + %wide.load37 = load <4 x i32>, <4 x i32>* %5, align 4, !dbg !58, !tbaa !31, !alias.scope !60 + %6 = getelementptr inbounds i32, i32* %B, i64 %offset.idx, !dbg !63 + %7 = bitcast i32* %6 to <4 x i32>*, !dbg !64 + store <4 x i32> %wide.load, <4 x i32>* %7, align 4, !dbg !64, !tbaa !31, !alias.scope !65, !noalias !67 + %8 = getelementptr inbounds i32, i32* %6, i64 4, !dbg !64 + %9 = bitcast i32* %8 to <4 x i32>*, !dbg !64 + store <4 x i32> %wide.load37, <4 x i32>* %9, align 4, !dbg !64, !tbaa !31, !alias.scope !65, !noalias !67 + %10 = or i64 %index, 3, !dbg !69 + %11 = getelementptr inbounds i32, i32* %A, i64 %10, !dbg !70 + %12 = bitcast i32* %11 to <4 x i32>*, !dbg !70 + %wide.load38 = load <4 x i32>, <4 x i32>* %12, align 4, !dbg !70, !tbaa !31, !alias.scope !60 + %13 = getelementptr inbounds i32, i32* %11, i64 4, !dbg !70 + %14 = bitcast i32* %13 to <4 x i32>*, !dbg !70 + %wide.load39 = load <4 x i32>, <4 x i32>* %14, align 4, !dbg !70, !tbaa !31, !alias.scope !60 + %15 = getelementptr inbounds i32, i32* %C, i64 %offset.idx, !dbg !71 + %16 = bitcast i32* %15 to <4 x i32>*, !dbg !72 + store <4 x i32> %wide.load38, <4 x i32>* %16, align 4, !dbg !72, !tbaa !31, !alias.scope !73, !noalias !60 + %17 = getelementptr inbounds i32, i32* %15, i64 4, !dbg !72 + %18 = bitcast i32* %17 to <4 x i32>*, !dbg !72 + store <4 x i32> %wide.load39, <4 x i32>* %18, align 4, !dbg !72, !tbaa !31, !alias.scope !73, !noalias !60 + %index.next = add nuw i64 %index, 8 + %19 = icmp eq i64 %index.next, %n.vec + br i1 %19, label %middle.block, label %vector.body, !llvm.loop !74 + +middle.block: ; preds = %vector.body + %cmp.n = icmp eq i64 %0, %n.vec, !dbg !57 + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader40, !dbg !57 + +for.body.preheader40: ; preds = %vector.memcheck, %for.body.preheader, %middle.block + %indvars.iv.ph = phi i64 [ 1, %vector.memcheck ], [ 1, %for.body.preheader ], [ %ind.end, %middle.block ] + br label %for.body, !dbg !57 + +for.cond.cleanup: ; preds = %for.body, %middle.block, %entry + ret void, !dbg !77 + +for.body: ; preds = %for.body.preheader40, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader40 ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !51, metadata !DIExpression()), !dbg !54 + %20 = add nsw i64 %indvars.iv, -1, !dbg !78 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %20, !dbg !58 + %21 = load i32, i32* %arrayidx, align 4, !dbg !58, !tbaa !31 + %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !63 + store i32 %21, i32* %arrayidx2, align 4, !dbg !64, !tbaa !31 + %22 = add nuw nsw i64 %indvars.iv, 2, !dbg !69 + %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %22, !dbg !70 + %23 = load i32, i32* %arrayidx4, align 4, !dbg !70, !tbaa !31 + %arrayidx6 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv, !dbg !71 + store i32 %23, i32* %arrayidx6, align 4, !dbg !72, !tbaa !31 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !79 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !51, metadata !DIExpression()), !dbg !54 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !55 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !57, !llvm.loop !80 +} + + +; // b) Dependence::Forward +; // Loop gets vectorized since it contains only a forward +; // dependency between A[i-2] and A[i] +; void test_forward(int n, int* A, int* B) { +; for(int i=1; i < n; ++i) { +; A[i] = 10; +; B[i] = A[i-2]; +; } +; } + +; CHECK: remark: source.c:25:3: loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized +define dso_local void @test_forward(i32 %n, i32* nocapture %A, i32* nocapture %B) local_unnamed_addr #0 !dbg !81 { +entry: + call void @llvm.dbg.value(metadata i32 %n, metadata !85, metadata !DIExpression()), !dbg !90 + call void @llvm.dbg.value(metadata i32* %A, metadata !86, metadata !DIExpression()), !dbg !90 + call void @llvm.dbg.value(metadata i32* %B, metadata !87, metadata !DIExpression()), !dbg !90 + call void @llvm.dbg.value(metadata i32 1, metadata !88, metadata !DIExpression()), !dbg !91 + %cmp11 = icmp sgt i32 %n, 1, !dbg !92 + br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup, !dbg !94 + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64, !dbg !92 + %0 = add nsw i64 %wide.trip.count, -1, !dbg !94 + %min.iters.check = icmp ult i64 %0, 4, !dbg !94 + br i1 %min.iters.check, label %for.body.preheader22, label %vector.memcheck, !dbg !94 + +vector.memcheck: ; preds = %for.body.preheader + %scevgep = getelementptr i32, i32* %A, i64 -1, !dbg !94 + %scevgep16 = getelementptr i32, i32* %A, i64 %wide.trip.count, !dbg !94 + %scevgep18 = getelementptr i32, i32* %B, i64 1, !dbg !94 + %scevgep20 = getelementptr i32, i32* %B, i64 %wide.trip.count, !dbg !94 + %bound0 = icmp ult i32* %scevgep, %scevgep20, !dbg !94 + %bound1 = icmp ult i32* %scevgep18, %scevgep16, !dbg !94 + %found.conflict = and i1 %bound0, %bound1, !dbg !94 + br i1 %found.conflict, label %for.body.preheader22, label %vector.ph, !dbg !94 + +vector.ph: ; preds = %vector.memcheck + %n.vec = and i64 %0, -4, !dbg !94 + %ind.end = or i64 %n.vec, 1, !dbg !94 + br label %vector.body, !dbg !94 + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %offset.idx = or i64 %index, 1 + %1 = getelementptr inbounds i32, i32* %A, i64 %offset.idx, !dbg !95 + %2 = bitcast i32* %1 to <4 x i32>*, !dbg !97 + store <4 x i32> , <4 x i32>* %2, align 4, !dbg !97, !tbaa !31, !alias.scope !98, !noalias !101 + %3 = add i64 %index, -1, !dbg !103 + %4 = getelementptr inbounds i32, i32* %A, i64 %3, !dbg !104 + %5 = bitcast i32* %4 to <4 x i32>*, !dbg !104 + %wide.load = load <4 x i32>, <4 x i32>* %5, align 4, !dbg !104, !tbaa !31, !alias.scope !98, !noalias !101 + %6 = getelementptr inbounds i32, i32* %B, i64 %offset.idx, !dbg !105 + %7 = bitcast i32* %6 to <4 x i32>*, !dbg !106 + store <4 x i32> %wide.load, <4 x i32>* %7, align 4, !dbg !106, !tbaa !31, !alias.scope !101 + %index.next = add nuw i64 %index, 4 + %8 = icmp eq i64 %index.next, %n.vec + br i1 %8, label %middle.block, label %vector.body, !llvm.loop !107 + +middle.block: ; preds = %vector.body + %cmp.n = icmp eq i64 %0, %n.vec, !dbg !94 + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader22, !dbg !94 + +for.body.preheader22: ; preds = %vector.memcheck, %for.body.preheader, %middle.block + %indvars.iv.ph = phi i64 [ 1, %vector.memcheck ], [ 1, %for.body.preheader ], [ %ind.end, %middle.block ] + br label %for.body, !dbg !94 + +for.cond.cleanup: ; preds = %for.body, %middle.block, %entry + ret void, !dbg !109 + +for.body: ; preds = %for.body.preheader22, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader22 ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !88, metadata !DIExpression()), !dbg !91 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !95 + store i32 10, i32* %arrayidx, align 4, !dbg !97, !tbaa !31 + %9 = add nsw i64 %indvars.iv, -2, !dbg !103 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %9, !dbg !104 + %10 = load i32, i32* %arrayidx2, align 4, !dbg !104, !tbaa !31 + %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !105 + store i32 %10, i32* %arrayidx4, align 4, !dbg !106, !tbaa !31 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !110 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !88, metadata !DIExpression()), !dbg !91 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !92 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !94, !llvm.loop !111 +} + +; // c) Dependence::BackwardVectorizable +; // Loop gets vectorized since it contains a backward dependency +; // between A[i] and A[i-4], but the dependency distance (4) is +; // greater than the minimum possible VF (2 in this case) +; void test_backwardVectorizable(int n, int* A, int* B) { +; for(int i=4; i < n; ++i) { +; A[i] = A[i-4] + 1; +; } +; } + +; CHECK: remark: source.c:37:3: loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized + +define dso_local void @test_backwardVectorizable(i32 %n, i32* nocapture %A, i32* nocapture readnone %B) local_unnamed_addr #0 !dbg !112 { +entry: + call void @llvm.dbg.value(metadata i32 %n, metadata !114, metadata !DIExpression()), !dbg !119 + call void @llvm.dbg.value(metadata i32* %A, metadata !115, metadata !DIExpression()), !dbg !119 + call void @llvm.dbg.value(metadata i32* %B, metadata !116, metadata !DIExpression()), !dbg !119 + call void @llvm.dbg.value(metadata i32 4, metadata !117, metadata !DIExpression()), !dbg !120 + %A11 = bitcast i32* %A to <4 x i32>*, !dbg !121 + %cmp8 = icmp sgt i32 %n, 4, !dbg !121 + br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup, !dbg !123 + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64, !dbg !121 + %0 = add nsw i64 %wide.trip.count, -4, !dbg !123 + %min.iters.check = icmp ult i64 %0, 4, !dbg !123 + br i1 %min.iters.check, label %for.body.preheader12, label %vector.ph, !dbg !123 + +vector.ph: ; preds = %for.body.preheader + %n.vec = and i64 %0, -4, !dbg !123 + %ind.end = add nsw i64 %n.vec, 4, !dbg !123 + %load_initial = load <4 x i32>, <4 x i32>* %A11, align 4 + br label %vector.body, !dbg !123 + +vector.body: ; preds = %vector.body, %vector.ph + %store_forwarded = phi <4 x i32> [ %load_initial, %vector.ph ], [ %1, %vector.body ] + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %offset.idx = add i64 %index, 4 + %1 = add nsw <4 x i32> %store_forwarded, , !dbg !124 + %2 = getelementptr inbounds i32, i32* %A, i64 %offset.idx, !dbg !126 + %3 = bitcast i32* %2 to <4 x i32>*, !dbg !127 + store <4 x i32> %1, <4 x i32>* %3, align 4, !dbg !127, !tbaa !31 + %index.next = add nuw i64 %index, 4 + %4 = icmp eq i64 %index.next, %n.vec + br i1 %4, label %middle.block, label %vector.body, !llvm.loop !128 + +middle.block: ; preds = %vector.body + %cmp.n = icmp eq i64 %0, %n.vec, !dbg !123 + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader12, !dbg !123 + +for.body.preheader12: ; preds = %for.body.preheader, %middle.block + %indvars.iv.ph = phi i64 [ 4, %for.body.preheader ], [ %ind.end, %middle.block ] + br label %for.body, !dbg !123 + +for.cond.cleanup: ; preds = %for.body, %middle.block, %entry + ret void, !dbg !130 + +for.body: ; preds = %for.body.preheader12, %for.body + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader12 ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !117, metadata !DIExpression()), !dbg !120 + %5 = add nsw i64 %indvars.iv, -4, !dbg !131 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %5, !dbg !132 + %6 = load i32, i32* %arrayidx, align 4, !dbg !132, !tbaa !31 + %add = add nsw i32 %6, 1, !dbg !124 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !126 + store i32 %add, i32* %arrayidx2, align 4, !dbg !127, !tbaa !31 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !133 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !117, metadata !DIExpression()), !dbg !120 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !121 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !123, !llvm.loop !134 +} + + +; // d) Dependence::Backward +; // Loop does not get vectorized since it contains a backward +; // dependency between A[i] and A[i+3]. +; void test_backward_dep(int n, double *A) { +; for (int i = 1; i <= n - 3; i += 3) { +; A[i] = A[i-1]; +; A[i+1] = A[i+3]; +; } +; } + +; CHECK: remark: source.c:48:14: loop not vectorized: Backward loop carried data dependence. Memory location is the same as accessed at line 47:5 + +define dso_local void @test_backward_dep(i32 %n, double* nocapture %A) local_unnamed_addr #0 !dbg !136 { +entry: + call void @llvm.dbg.value(metadata i32 %n, metadata !142, metadata !DIExpression()), !dbg !146 + call void @llvm.dbg.value(metadata double* %A, metadata !143, metadata !DIExpression()), !dbg !146 + call void @llvm.dbg.value(metadata i32 1, metadata !144, metadata !DIExpression()), !dbg !147 + call void @llvm.dbg.value(metadata i32 1, metadata !144, metadata !DIExpression()), !dbg !147 + %cmp.not19 = icmp slt i32 %n, 4, !dbg !148 + br i1 %cmp.not19, label %for.cond.cleanup, label %for.body.preheader, !dbg !150 + +for.body.preheader: ; preds = %entry + %sub = add nsw i32 %n, -3 + %0 = zext i32 %sub to i64, !dbg !150 + br label %for.body, !dbg !150 + +for.cond.cleanup: ; preds = %for.body, %entry + ret void, !dbg !151 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 1, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !144, metadata !DIExpression()), !dbg !147 + %1 = add nsw i64 %indvars.iv, -1, !dbg !152 + %arrayidx = getelementptr inbounds double, double* %A, i64 %1, !dbg !154 + %2 = load double, double* %arrayidx, align 8, !dbg !154, !tbaa !155 + %arrayidx3 = getelementptr inbounds double, double* %A, i64 %indvars.iv, !dbg !157 + store double %2, double* %arrayidx3, align 8, !dbg !158, !tbaa !155 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3, !dbg !159 + %arrayidx5 = getelementptr inbounds double, double* %A, i64 %indvars.iv.next, !dbg !160 + %3 = load double, double* %arrayidx5, align 8, !dbg !160, !tbaa !155 + %4 = add nuw nsw i64 %indvars.iv, 1, !dbg !161 + %arrayidx8 = getelementptr inbounds double, double* %A, i64 %4, !dbg !162 + store double %3, double* %arrayidx8, align 8, !dbg !163, !tbaa !155 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !144, metadata !DIExpression()), !dbg !147 + %cmp.not = icmp ugt i64 %indvars.iv.next, %0, !dbg !148 + br i1 %cmp.not, label %for.cond.cleanup, label %for.body, !dbg !150, !llvm.loop !164 +} + +; // e) Dependence::ForwardButPreventsForwarding +; // Loop does not get vectorized despite only having a forward +; // dependency between A[i] and A[i-3]. +; // This is because the store-to-load forwarding distance (here 3) +; // needs to be a multiple of vector factor otherwise the +; // store (A[5:6] in i=5) and load (A[4:5],A[6:7] in i=7,9) are unaligned. +; void test_forwardPreventsForwarding_dep(int* A, int* B, int n) { +; for(int i=3; i < n; ++i) { +; A[i] = 10; +; B[i] = A[i-3]; +; } +; } + +; CHECK: remark: source.c:61:12: loop not vectorized: Forward loop carried data dependence that prevents store-to-load forwarding. Memory location is the same as accessed at line 60:5 + +define dso_local void @test_forwardPreventsForwarding_dep(i32* nocapture %A, i32* nocapture %B, i32 %n) local_unnamed_addr #0 !dbg !166 { +entry: + call void @llvm.dbg.value(metadata i32* %A, metadata !168, metadata !DIExpression()), !dbg !173 + call void @llvm.dbg.value(metadata i32* %B, metadata !169, metadata !DIExpression()), !dbg !173 + call void @llvm.dbg.value(metadata i32 %n, metadata !170, metadata !DIExpression()), !dbg !173 + call void @llvm.dbg.value(metadata i32 3, metadata !171, metadata !DIExpression()), !dbg !174 + %cmp11 = icmp sgt i32 %n, 3, !dbg !175 + br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup, !dbg !177 + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64, !dbg !175 + br label %for.body, !dbg !177 + +for.cond.cleanup: ; preds = %for.body, %entry + ret void, !dbg !178 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 3, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !171, metadata !DIExpression()), !dbg !174 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !179 + store i32 10, i32* %arrayidx, align 4, !dbg !181, !tbaa !31 + %0 = add nsw i64 %indvars.iv, -3, !dbg !182 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !183 + %1 = load i32, i32* %arrayidx2, align 4, !dbg !183, !tbaa !31 + %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !184 + store i32 %1, i32* %arrayidx4, align 4, !dbg !185, !tbaa !31 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !186 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !171, metadata !DIExpression()), !dbg !174 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !175 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !177, !llvm.loop !187 +} + +; // f) Dependence::BackwardVectorizableButPreventsForwarding +; // Loop does not get vectorized despite having a backward +; // but vectorizable dependency between A[i] and A[i-15]. +; // +; // This is because the store-to-load forwarding distance (here 15) +; // needs to be a multiple of vector factor otherwise +; // store (A[16:17] in i=16) and load (A[15:16], A[17:18] in i=30,32) are unaligned. +; void test_backwardVectorizableButPreventsForwarding(int* A, int n) { +; for(int i=15; i < n; ++i) { +; A[i] = A[i-2] + A[i-15]; +; } +; } + +; CHECK: remark: source.c:74:5: loop not vectorized: Backward loop carried data dependence that prevents store-to-load forwarding. Memory location is the same as accessed at line 74:21 + +define dso_local void @test_backwardVectorizableButPreventsForwarding(i32* nocapture %A, i32 %n) local_unnamed_addr #0 !dbg !189 { +entry: + call void @llvm.dbg.value(metadata i32* %A, metadata !193, metadata !DIExpression()), !dbg !197 + call void @llvm.dbg.value(metadata i32 %n, metadata !194, metadata !DIExpression()), !dbg !197 + call void @llvm.dbg.value(metadata i32 15, metadata !195, metadata !DIExpression()), !dbg !198 + %cmp13 = icmp sgt i32 %n, 15, !dbg !199 + br i1 %cmp13, label %for.body.preheader, label %for.cond.cleanup, !dbg !201 + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64, !dbg !199 + br label %for.body, !dbg !201 + +for.cond.cleanup: ; preds = %for.body, %entry + ret void, !dbg !202 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 15, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !195, metadata !DIExpression()), !dbg !198 + %0 = add nsw i64 %indvars.iv, -2, !dbg !203 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !205 + %1 = load i32, i32* %arrayidx, align 4, !dbg !205, !tbaa !31 + %2 = add nsw i64 %indvars.iv, -15, !dbg !206 + %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %2, !dbg !207 + %3 = load i32, i32* %arrayidx3, align 4, !dbg !207, !tbaa !31 + %add = add nsw i32 %3, %1, !dbg !208 + %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !209 + store i32 %add, i32* %arrayidx5, align 4, !dbg !210, !tbaa !31 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !211 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !195, metadata !DIExpression()), !dbg !198 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !199 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !201, !llvm.loop !212 +} + +; // g) Dependence::Unknown +; // Different stride lengths +; void test_unknown_dep(int* A, int n) { +; for(int i=0; i < n; ++i) { +; A[(i+1)*4] = 10; +; A[i] = 100; +; } +; } + +; CHECK: remark: source.c:83:7: loop not vectorized: Unknown data dependence. Memory location is the same as accessed at line 82:7 + +define dso_local void @test_unknown_dep(i32* nocapture %A, i32 %n) local_unnamed_addr #1 !dbg !214 { +entry: + call void @llvm.dbg.value(metadata i32* %A, metadata !216, metadata !DIExpression()), !dbg !220 + call void @llvm.dbg.value(metadata i32 %n, metadata !217, metadata !DIExpression()), !dbg !220 + call void @llvm.dbg.value(metadata i32 0, metadata !218, metadata !DIExpression()), !dbg !221 + %cmp8 = icmp sgt i32 %n, 0, !dbg !222 + br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup, !dbg !224 + +for.body.preheader: ; preds = %entry + %wide.trip.count = zext i32 %n to i64, !dbg !222 + br label %for.body, !dbg !224 + +for.cond.cleanup: ; preds = %for.body, %entry + ret void, !dbg !225 + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !218, metadata !DIExpression()), !dbg !221 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !226 + %0 = shl nsw i64 %indvars.iv.next, 2, !dbg !228 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !229 + store i32 10, i32* %arrayidx, align 4, !dbg !230, !tbaa !31 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !231 + store i32 100, i32* %arrayidx2, align 4, !dbg !232, !tbaa !31 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !218, metadata !DIExpression()), !dbg !221 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count, !dbg !222 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !224, !llvm.loop !233 +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) #2 + +attributes #0 = { nofree norecurse nosync nounwind uwtable "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" } +attributes #1 = { nofree norecurse nosync nounwind uwtable writeonly "frame-pointer"="non-leaf" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" } +attributes #2 = { nofree nosync nounwind readnone speculatable willreturn } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5, !6, !7, !8, !9, !10, !11} +!llvm.ident = !{!12} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0 (https://github.com/llvm/llvm-project.git 54f0f826c5c7d0ff16c230b259cb6aad33e18d97)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "source.c", directory: "/Users/maljaj01/llvm_work/llvm-project/SHOJICOMP_7084") +!2 = !{} +!3 = !{i32 7, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"wchar_size", i32 4} +!6 = !{i32 1, !"branch-target-enforcement", i32 0} +!7 = !{i32 1, !"sign-return-address", i32 0} +!8 = !{i32 1, !"sign-return-address-all", i32 0} +!9 = !{i32 1, !"sign-return-address-with-bkey", i32 0} +!10 = !{i32 7, !"uwtable", i32 1} +!11 = !{i32 7, !"frame-pointer", i32 1} +!12 = !{!"clang version 14.0.0 (https://github.com/llvm/llvm-project.git 54f0f826c5c7d0ff16c230b259cb6aad33e18d97)"} +!13 = distinct !DISubprogram(name: "test_unknown_bounds", scope: !1, file: !1, line: 2, type: !14, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !18) +!14 = !DISubroutineType(types: !15) +!15 = !{null, !16, !16, !17} +!16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 64) +!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!18 = !{!19, !20, !21, !22} +!19 = !DILocalVariable(name: "A", arg: 1, scope: !13, file: !1, line: 2, type: !16) +!20 = !DILocalVariable(name: "B", arg: 2, scope: !13, file: !1, line: 2, type: !16) +!21 = !DILocalVariable(name: "n", arg: 3, scope: !13, file: !1, line: 2, type: !17) +!22 = !DILocalVariable(name: "i", scope: !23, file: !1, line: 3, type: !17) +!23 = distinct !DILexicalBlock(scope: !13, file: !1, line: 3, column: 5) +!24 = !DILocation(line: 0, scope: !13) +!25 = !DILocation(line: 0, scope: !23) +!26 = !DILocation(line: 3, column: 22, scope: !27) +!27 = distinct !DILexicalBlock(scope: !23, file: !1, line: 3, column: 5) +!28 = !DILocation(line: 3, column: 5, scope: !23) +!29 = !DILocation(line: 5, column: 1, scope: !13) +!30 = !DILocation(line: 4, column: 18, scope: !27) +!31 = !{!32, !32, i64 0} +!32 = !{!"int", !33, i64 0} +!33 = !{!"omnipotent char", !34, i64 0} +!34 = !{!"Simple C/C++ TBAA"} +!35 = !DILocation(line: 4, column: 16, scope: !27) +!36 = !DILocation(line: 4, column: 24, scope: !27) +!37 = !DILocation(line: 4, column: 9, scope: !27) +!38 = !DILocation(line: 4, column: 14, scope: !27) +!39 = !DILocation(line: 3, column: 28, scope: !27) +!40 = distinct !{!40, !28, !41, !42} +!41 = !DILocation(line: 4, column: 26, scope: !23) +!42 = !{!"llvm.loop.mustprogress"} +!43 = distinct !DISubprogram(name: "test_nodep", scope: !1, file: !1, line: 14, type: !44, scopeLine: 14, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !46) +!44 = !DISubroutineType(types: !45) +!45 = !{null, !17, !16, !16, !16} +!46 = !{!47, !48, !49, !50, !51} +!47 = !DILocalVariable(name: "n", arg: 1, scope: !43, file: !1, line: 14, type: !17) +!48 = !DILocalVariable(name: "A", arg: 2, scope: !43, file: !1, line: 14, type: !16) +!49 = !DILocalVariable(name: "B", arg: 3, scope: !43, file: !1, line: 14, type: !16) +!50 = !DILocalVariable(name: "C", arg: 4, scope: !43, file: !1, line: 14, type: !16) +!51 = !DILocalVariable(name: "i", scope: !52, file: !1, line: 15, type: !17) +!52 = distinct !DILexicalBlock(scope: !43, file: !1, line: 15, column: 3) +!53 = !DILocation(line: 0, scope: !43) +!54 = !DILocation(line: 0, scope: !52) +!55 = !DILocation(line: 15, column: 20, scope: !56) +!56 = distinct !DILexicalBlock(scope: !52, file: !1, line: 15, column: 3) +!57 = !DILocation(line: 15, column: 3, scope: !52) +!58 = !DILocation(line: 16, column: 12, scope: !59) +!59 = distinct !DILexicalBlock(scope: !56, file: !1, line: 15, column: 31) +!60 = !{!61} +!61 = distinct !{!61, !62} +!62 = distinct !{!62, !"LVerDomain"} +!63 = !DILocation(line: 16, column: 5, scope: !59) +!64 = !DILocation(line: 16, column: 10, scope: !59) +!65 = !{!66} +!66 = distinct !{!66, !62} +!67 = !{!68, !61} +!68 = distinct !{!68, !62} +!69 = !DILocation(line: 17, column: 15, scope: !59) +!70 = !DILocation(line: 17, column: 12, scope: !59) +!71 = !DILocation(line: 17, column: 5, scope: !59) +!72 = !DILocation(line: 17, column: 10, scope: !59) +!73 = !{!68} +!74 = distinct !{!74, !57, !75, !42, !76} +!75 = !DILocation(line: 18, column: 3, scope: !52) +!76 = !{!"llvm.loop.isvectorized", i32 1} +!77 = !DILocation(line: 19, column: 1, scope: !43) +!78 = !DILocation(line: 16, column: 15, scope: !59) +!79 = !DILocation(line: 15, column: 26, scope: !56) +!80 = distinct !{!80, !57, !75, !42, !76} +!81 = distinct !DISubprogram(name: "test_forward", scope: !1, file: !1, line: 24, type: !82, scopeLine: 24, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !84) +!82 = !DISubroutineType(types: !83) +!83 = !{null, !17, !16, !16} +!84 = !{!85, !86, !87, !88} +!85 = !DILocalVariable(name: "n", arg: 1, scope: !81, file: !1, line: 24, type: !17) +!86 = !DILocalVariable(name: "A", arg: 2, scope: !81, file: !1, line: 24, type: !16) +!87 = !DILocalVariable(name: "B", arg: 3, scope: !81, file: !1, line: 24, type: !16) +!88 = !DILocalVariable(name: "i", scope: !89, file: !1, line: 25, type: !17) +!89 = distinct !DILexicalBlock(scope: !81, file: !1, line: 25, column: 3) +!90 = !DILocation(line: 0, scope: !81) +!91 = !DILocation(line: 0, scope: !89) +!92 = !DILocation(line: 25, column: 18, scope: !93) +!93 = distinct !DILexicalBlock(scope: !89, file: !1, line: 25, column: 3) +!94 = !DILocation(line: 25, column: 3, scope: !89) +!95 = !DILocation(line: 26, column: 5, scope: !96) +!96 = distinct !DILexicalBlock(scope: !93, file: !1, line: 25, column: 28) +!97 = !DILocation(line: 26, column: 10, scope: !96) +!98 = !{!99} +!99 = distinct !{!99, !100} +!100 = distinct !{!100, !"LVerDomain"} +!101 = !{!102} +!102 = distinct !{!102, !100} +!103 = !DILocation(line: 27, column: 15, scope: !96) +!104 = !DILocation(line: 27, column: 12, scope: !96) +!105 = !DILocation(line: 27, column: 5, scope: !96) +!106 = !DILocation(line: 27, column: 10, scope: !96) +!107 = distinct !{!107, !94, !108, !42, !76} +!108 = !DILocation(line: 28, column: 3, scope: !89) +!109 = !DILocation(line: 29, column: 1, scope: !81) +!110 = !DILocation(line: 25, column: 23, scope: !93) +!111 = distinct !{!111, !94, !108, !42, !76} +!112 = distinct !DISubprogram(name: "test_backwardVectorizable", scope: !1, file: !1, line: 36, type: !82, scopeLine: 36, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !113) +!113 = !{!114, !115, !116, !117} +!114 = !DILocalVariable(name: "n", arg: 1, scope: !112, file: !1, line: 36, type: !17) +!115 = !DILocalVariable(name: "A", arg: 2, scope: !112, file: !1, line: 36, type: !16) +!116 = !DILocalVariable(name: "B", arg: 3, scope: !112, file: !1, line: 36, type: !16) +!117 = !DILocalVariable(name: "i", scope: !118, file: !1, line: 37, type: !17) +!118 = distinct !DILexicalBlock(scope: !112, file: !1, line: 37, column: 3) +!119 = !DILocation(line: 0, scope: !112) +!120 = !DILocation(line: 0, scope: !118) +!121 = !DILocation(line: 37, column: 18, scope: !122) +!122 = distinct !DILexicalBlock(scope: !118, file: !1, line: 37, column: 3) +!123 = !DILocation(line: 37, column: 3, scope: !118) +!124 = !DILocation(line: 38, column: 19, scope: !125) +!125 = distinct !DILexicalBlock(scope: !122, file: !1, line: 37, column: 28) +!126 = !DILocation(line: 38, column: 5, scope: !125) +!127 = !DILocation(line: 38, column: 10, scope: !125) +!128 = distinct !{!128, !123, !129, !42, !76} +!129 = !DILocation(line: 39, column: 3, scope: !118) +!130 = !DILocation(line: 40, column: 1, scope: !112) +!131 = !DILocation(line: 38, column: 15, scope: !125) +!132 = !DILocation(line: 38, column: 12, scope: !125) +!133 = !DILocation(line: 37, column: 23, scope: !122) +!134 = distinct !{!134, !123, !129, !42, !135, !76} +!135 = !{!"llvm.loop.unroll.runtime.disable"} +!136 = distinct !DISubprogram(name: "test_backward_dep", scope: !1, file: !1, line: 45, type: !137, scopeLine: 45, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !141) +!137 = !DISubroutineType(types: !138) +!138 = !{null, !17, !139} +!139 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !140, size: 64) +!140 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float) +!141 = !{!142, !143, !144} +!142 = !DILocalVariable(name: "n", arg: 1, scope: !136, file: !1, line: 45, type: !17) +!143 = !DILocalVariable(name: "A", arg: 2, scope: !136, file: !1, line: 45, type: !139) +!144 = !DILocalVariable(name: "i", scope: !145, file: !1, line: 46, type: !17) +!145 = distinct !DILexicalBlock(scope: !136, file: !1, line: 46, column: 3) +!146 = !DILocation(line: 0, scope: !136) +!147 = !DILocation(line: 0, scope: !145) +!148 = !DILocation(line: 46, column: 21, scope: !149) +!149 = distinct !DILexicalBlock(scope: !145, file: !1, line: 46, column: 3) +!150 = !DILocation(line: 46, column: 3, scope: !145) +!151 = !DILocation(line: 50, column: 1, scope: !136) +!152 = !DILocation(line: 47, column: 15, scope: !153) +!153 = distinct !DILexicalBlock(scope: !149, file: !1, line: 46, column: 39) +!154 = !DILocation(line: 47, column: 12, scope: !153) +!155 = !{!156, !156, i64 0} +!156 = !{!"double", !33, i64 0} +!157 = !DILocation(line: 47, column: 5, scope: !153) +!158 = !DILocation(line: 47, column: 10, scope: !153) +!159 = !DILocation(line: 48, column: 17, scope: !153) +!160 = !DILocation(line: 48, column: 14, scope: !153) +!161 = !DILocation(line: 48, column: 8, scope: !153) +!162 = !DILocation(line: 48, column: 5, scope: !153) +!163 = !DILocation(line: 48, column: 12, scope: !153) +!164 = distinct !{!164, !150, !165, !42} +!165 = !DILocation(line: 49, column: 3, scope: !145) +!166 = distinct !DISubprogram(name: "test_forwardPreventsForwarding_dep", scope: !1, file: !1, line: 58, type: !14, scopeLine: 58, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !167) +!167 = !{!168, !169, !170, !171} +!168 = !DILocalVariable(name: "A", arg: 1, scope: !166, file: !1, line: 58, type: !16) +!169 = !DILocalVariable(name: "B", arg: 2, scope: !166, file: !1, line: 58, type: !16) +!170 = !DILocalVariable(name: "n", arg: 3, scope: !166, file: !1, line: 58, type: !17) +!171 = !DILocalVariable(name: "i", scope: !172, file: !1, line: 59, type: !17) +!172 = distinct !DILexicalBlock(scope: !166, file: !1, line: 59, column: 3) +!173 = !DILocation(line: 0, scope: !166) +!174 = !DILocation(line: 0, scope: !172) +!175 = !DILocation(line: 59, column: 18, scope: !176) +!176 = distinct !DILexicalBlock(scope: !172, file: !1, line: 59, column: 3) +!177 = !DILocation(line: 59, column: 3, scope: !172) +!178 = !DILocation(line: 63, column: 1, scope: !166) +!179 = !DILocation(line: 60, column: 5, scope: !180) +!180 = distinct !DILexicalBlock(scope: !176, file: !1, line: 59, column: 28) +!181 = !DILocation(line: 60, column: 10, scope: !180) +!182 = !DILocation(line: 61, column: 15, scope: !180) +!183 = !DILocation(line: 61, column: 12, scope: !180) +!184 = !DILocation(line: 61, column: 5, scope: !180) +!185 = !DILocation(line: 61, column: 10, scope: !180) +!186 = !DILocation(line: 59, column: 23, scope: !176) +!187 = distinct !{!187, !177, !188, !42} +!188 = !DILocation(line: 62, column: 3, scope: !172) +!189 = distinct !DISubprogram(name: "test_backwardVectorizableButPreventsForwarding", scope: !1, file: !1, line: 72, type: !190, scopeLine: 72, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !192) +!190 = !DISubroutineType(types: !191) +!191 = !{null, !16, !17} +!192 = !{!193, !194, !195} +!193 = !DILocalVariable(name: "A", arg: 1, scope: !189, file: !1, line: 72, type: !16) +!194 = !DILocalVariable(name: "n", arg: 2, scope: !189, file: !1, line: 72, type: !17) +!195 = !DILocalVariable(name: "i", scope: !196, file: !1, line: 73, type: !17) +!196 = distinct !DILexicalBlock(scope: !189, file: !1, line: 73, column: 3) +!197 = !DILocation(line: 0, scope: !189) +!198 = !DILocation(line: 0, scope: !196) +!199 = !DILocation(line: 73, column: 19, scope: !200) +!200 = distinct !DILexicalBlock(scope: !196, file: !1, line: 73, column: 3) +!201 = !DILocation(line: 73, column: 3, scope: !196) +!202 = !DILocation(line: 76, column: 1, scope: !189) +!203 = !DILocation(line: 74, column: 15, scope: !204) +!204 = distinct !DILexicalBlock(scope: !200, file: !1, line: 73, column: 29) +!205 = !DILocation(line: 74, column: 12, scope: !204) +!206 = !DILocation(line: 74, column: 24, scope: !204) +!207 = !DILocation(line: 74, column: 21, scope: !204) +!208 = !DILocation(line: 74, column: 19, scope: !204) +!209 = !DILocation(line: 74, column: 5, scope: !204) +!210 = !DILocation(line: 74, column: 10, scope: !204) +!211 = !DILocation(line: 73, column: 24, scope: !200) +!212 = distinct !{!212, !201, !213, !42} +!213 = !DILocation(line: 75, column: 3, scope: !196) +!214 = distinct !DISubprogram(name: "test_unknown_dep", scope: !1, file: !1, line: 80, type: !190, scopeLine: 80, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !215) +!215 = !{!216, !217, !218} +!216 = !DILocalVariable(name: "A", arg: 1, scope: !214, file: !1, line: 80, type: !16) +!217 = !DILocalVariable(name: "n", arg: 2, scope: !214, file: !1, line: 80, type: !17) +!218 = !DILocalVariable(name: "i", scope: !219, file: !1, line: 81, type: !17) +!219 = distinct !DILexicalBlock(scope: !214, file: !1, line: 81, column: 3) +!220 = !DILocation(line: 0, scope: !214) +!221 = !DILocation(line: 0, scope: !219) +!222 = !DILocation(line: 81, column: 18, scope: !223) +!223 = distinct !DILexicalBlock(scope: !219, file: !1, line: 81, column: 3) +!224 = !DILocation(line: 81, column: 3, scope: !219) +!225 = !DILocation(line: 85, column: 1, scope: !214) +!226 = !DILocation(line: 82, column: 11, scope: !227) +!227 = distinct !DILexicalBlock(scope: !223, file: !1, line: 81, column: 28) +!228 = !DILocation(line: 82, column: 14, scope: !227) +!229 = !DILocation(line: 82, column: 7, scope: !227) +!230 = !DILocation(line: 82, column: 18, scope: !227) +!231 = !DILocation(line: 83, column: 7, scope: !227) +!232 = !DILocation(line: 83, column: 12, scope: !227) +!233 = distinct !{!233, !224, !234, !42} +!234 = !DILocation(line: 84, column: 3, scope: !219)