Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -5489,6 +5489,22 @@ !0 = !{!"llvm.loop.vectorize.predicate.enable", i1 0} !1 = !{!"llvm.loop.vectorize.predicate.enable", i1 1} +'``llvm.loop.vectorize.ivdep.enable``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This metadata indicates to the vectorizer to ignore dependencies between +memory accesses which have not been determined to be either safe or unsafe +for vectorization. This differs from ``llvm.loop.parallel_access``, which +considers no dependencies to be present between memory accesses belonging +to the same access group. The first operand is the string +``llvm.loop.vectorize.ivdep.enable`` and the second operand is a bit. A +value of 1 implies that the functionality of this metadata is enabled for +the loop. + +.. code-block:: llvm + + !0 = !{!"llvm.loop.vectorize.ivdep.enable", i1 1} + '``llvm.loop.vectorize.width``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: llvm/include/llvm/Analysis/LoopAccessAnalysis.h =================================================================== --- llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -201,7 +201,7 @@ /// /// Only checks sets with elements in \p CheckDeps. bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, - const ValueToValueMap &Strides); + const ValueToValueMap &Strides, bool UnknownDepHint); /// No memory dependence was encountered that would inhibit /// vectorization. @@ -516,7 +516,8 @@ class LoopAccessInfo { public: LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, - AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI); + AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, + bool UnknownDepHint = false); /// Return true we can analyze the memory accesses in the loop and there are /// no memory dependence cycles. @@ -608,7 +609,8 @@ private: /// Analyze the loop. void analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, - const TargetLibraryInfo *TLI, DominatorTree *DT); + const TargetLibraryInfo *TLI, DominatorTree *DT, + bool UnknownDepHint); /// Check if the structure of the loop allows it to be analyzed by this /// pass. @@ -735,7 +737,7 @@ /// Query the result of the loop access information for the loop \p L. /// /// If there is no cached result available run the analysis. - const LoopAccessInfo &getInfo(Loop *L); + const LoopAccessInfo &getInfo(Loop *L, bool UnknownDepHint = false); void releaseMemory() override { // Invalidate the cache when the pass is freed. Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h =================================================================== --- llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h +++ llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h @@ -44,7 +44,7 @@ /// careful NOT to add them if the user hasn't specifically asked so. class LoopVectorizeHints { enum HintKind { HK_WIDTH, HK_UNROLL, HK_FORCE, HK_ISVECTORIZED, - HK_PREDICATE }; + HK_PREDICATE, HK_IVDEP }; /// Hint - associates name and validation with the hint value. struct Hint { @@ -73,6 +73,9 @@ /// Vector Predicate Hint Predicate; + /// Ignore Vector dependencies + Hint Ivdep; + /// Return the loop metadata prefix. static StringRef Prefix() { return "llvm.loop."; } @@ -102,6 +105,7 @@ unsigned getInterleave() const { return Interleave.Value; } unsigned getIsVectorized() const { return IsVectorized.Value; } unsigned getPredicate() const { return Predicate.Value; } + unsigned getIvdep() const { return Ivdep.Value; } enum ForceKind getForce() const { if ((ForceKind)Force.Value == FK_Undefined && hasDisableAllTransformsHint(TheLoop)) @@ -199,7 +203,7 @@ LoopVectorizationLegality( Loop *L, PredicatedScalarEvolution &PSE, DominatorTree *DT, TargetTransformInfo *TTI, TargetLibraryInfo *TLI, AliasAnalysis *AA, - Function *F, std::function *GetLAA, + Function *F, std::function *GetLAA, LoopInfo *LI, OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC) @@ -405,7 +409,7 @@ DominatorTree *DT; // LoopAccess analysis. - std::function *GetLAA; + std::function *GetLAA; // And the loop-accesses info corresponding to this loop. This pointer is // null until canVectorizeMemory sets it up. Index: llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h =================================================================== --- llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h +++ llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h @@ -138,7 +138,7 @@ DemandedBits *DB; AliasAnalysis *AA; AssumptionCache *AC; - std::function *GetLAA; + std::function *GetLAA; OptimizationRemarkEmitter *ORE; ProfileSummaryInfo *PSI; @@ -149,7 +149,7 @@ TargetTransformInfo &TTI_, DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_, - std::function &GetLAA_, + std::function &GetLAA_, OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_); bool processLoop(Loop *L); Index: llvm/lib/Analysis/LoopAccessAnalysis.cpp =================================================================== --- llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1633,10 +1633,12 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, - const ValueToValueMap &Strides) { + const ValueToValueMap &Strides, + bool UnknownDepHint) { MaxSafeDepDistBytes = -1; SmallPtrSet Visited; + Status = VectorizationSafetyStatus::Safe; for (MemAccessInfo CurAccess : CheckDeps) { if (Visited.count(CurAccess)) continue; @@ -1678,7 +1680,13 @@ Dependence::DepType Type = isDependent(*A.first, A.second, *B.first, B.second, Strides); - mergeInStatus(Dependence::isSafeForVectorization(Type)); + // Update safety status depending on whether the Dependence type + // is safe. If Unknown Dependence type is to be considered safe, + // do not update safety status. + if (!UnknownDepHint || + !(Dependence::isSafeForVectorization(Type) == + VectorizationSafetyStatus::PossiblySafeWithRtChecks)) + mergeInStatus(Dependence::isSafeForVectorization(Type)); // Gather dependences unless we accumulated MaxDependences // dependences. In that case return as soon as we find the first @@ -1788,7 +1796,8 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI, const TargetLibraryInfo *TLI, - DominatorTree *DT) { + DominatorTree *DT, + bool UnknownDepHint) { typedef SmallPtrSet ValueSet; // Holds the Load and Store instructions. @@ -2022,7 +2031,8 @@ if (Accesses.isDependencyCheckNeeded()) { LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); CanVecMem = DepChecker->areDepsSafe( - DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides); + DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides, + UnknownDepHint); MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes(); if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) { @@ -2343,7 +2353,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, - DominatorTree *DT, LoopInfo *LI) + DominatorTree *DT, LoopInfo *LI, + bool UnknownDepHint) : PSE(std::make_unique(*SE, *L)), PtrRtChecking(std::make_unique(SE)), DepChecker(std::make_unique(*PSE, L)), TheLoop(L), @@ -2351,7 +2362,7 @@ HasConvergentOp(false), HasDependenceInvolvingLoopInvariantAddress(false) { if (canAnalyzeLoop()) - analyzeLoop(AA, LI, TLI, DT); + analyzeLoop(AA, LI, TLI, DT, UnknownDepHint); } void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { @@ -2397,11 +2408,13 @@ PSE->print(OS, Depth); } -const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) { +const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L, + bool UnknownDepHint) { auto &LAI = LoopAccessInfoMap[L]; if (!LAI) - LAI = std::make_unique(L, SE, TLI, AA, DT, LI); + LAI = std::make_unique(L, SE, TLI, AA, DT, LI, + UnknownDepHint); return *LAI.get(); } Index: llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp @@ -61,6 +61,8 @@ case HK_ISVECTORIZED: case HK_PREDICATE: return (Val == 0 || Val == 1); + case HK_IVDEP: + return (Val == 1); } return false; } @@ -72,7 +74,8 @@ Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), Force("vectorize.enable", FK_Undefined, HK_FORCE), IsVectorized("isvectorized", 0, HK_ISVECTORIZED), - Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L), + Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), + Ivdep("vectorize.ivdep.enable", 0, HK_IVDEP), TheLoop(L), ORE(ORE) { // Populate values with existing loop metadata. getHintsFromMetadata(); @@ -224,7 +227,8 @@ return; unsigned Val = C->getZExtValue(); - Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; + Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate, + &Ivdep}; for (auto H : Hints) { if (Name == H->Name) { if (H->validate(Val)) @@ -825,7 +829,7 @@ } bool LoopVectorizationLegality::canVectorizeMemory() { - LAI = &(*GetLAA)(*TheLoop); + LAI = &(*GetLAA)(*TheLoop, Hints->getIvdep()); const OptimizationRemarkAnalysis *LAR = LAI->getReport(); if (LAR) { ORE->emit([&]() { Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp =================================================================== --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -1610,8 +1610,10 @@ auto *ORE = &getAnalysis().getORE(); auto *PSI = &getAnalysis().getPSI(); - std::function GetLAA = - [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); }; + std::function GetLAA = + [&](Loop &L, bool UnknownDepHint) -> const LoopAccessInfo & + { return LAA-> + getInfo(&L, UnknownDepHint); }; return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC, GetLAA, *ORE, PSI); @@ -7800,7 +7802,7 @@ Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_, DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_, DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_, - std::function &GetLAA_, + std::function &GetLAA_, OptimizationRemarkEmitter &ORE_, ProfileSummaryInfo *PSI_) { SE = &SE_; LI = &LI_; @@ -7879,8 +7881,8 @@ : nullptr; auto &LAM = AM.getResult(F).getManager(); - std::function GetLAA = - [&](Loop &L) -> const LoopAccessInfo & { + std::function GetLAA = + [&](Loop &L, bool UnknownDepHint) -> const LoopAccessInfo & { LoopStandardAnalysisResults AR = {AA, AC, DT, LI, SE, TLI, TTI, MSSA}; return LAM.getResult(L, AR); }; Index: llvm/test/Transforms/LoopVectorize/X86/ivdep-aliasing.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/ivdep-aliasing.ll @@ -0,0 +1,166 @@ +; RUN: opt < %s -O3 -S | FileCheck %s +; ModuleID = 'ivdeptest.c' +source_filename = "ivdeptest.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1 +@.str.1 = private unnamed_addr constant [6 x i8] c"%d %d\00", align 1 +@.str.2 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind uwtable +define dso_local i32* @addLoops(i32* noalias %a, i32* noalias %b, i32 %LEN_1D) #0 { +entry: + %a.addr = alloca i32*, align 8 + %b.addr = alloca i32*, align 8 + %LEN_1D.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32* %a, i32** %a.addr, align 8 + store i32* %b, i32** %b.addr, align 8 + store i32 %LEN_1D, i32* %LEN_1D.addr, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +; CHECK: vector.ph: +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %1 = load i32, i32* %LEN_1D.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end +; CHECK: br label %vector.body + +; CHECK: vector.body: +for.body: ; preds = %for.cond + %2 = load i32*, i32** %a.addr, align 8 + %3 = load i32*, i32** %b.addr, align 8 + %4 = load i32, i32* %i, align 4 + %idxprom = sext i32 %4 to i64 + %arrayidx = getelementptr inbounds i32, i32* %3, i64 %idxprom + %5 = load i32, i32* %arrayidx, align 4 + %idxprom1 = sext i32 %5 to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %2, i64 %idxprom1 + %6 = load i32, i32* %arrayidx2, align 4 + %inc = add nsw i32 %6, 1 + store i32 %inc, i32* %arrayidx2, align 4 + br label %for.inc +; CHECK: br i1 %24, label %middle.block, label %vector.body, !llvm.loop !2 + +for.inc: ; preds = %for.body + %7 = load i32, i32* %i, align 4 + %inc3 = add nsw i32 %7, 1 + store i32 %inc3, i32* %i, align 4 + br label %for.cond, !llvm.loop !2 + +for.end: ; preds = %for.cond + %8 = load i32*, i32** %a.addr, align 8 + ret i32* %8 +} + +; Function Attrs: noinline nounwind uwtable +define dso_local i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %LEN_1D = alloca i32, align 4 + %i = alloca i32, align 4 + %a = alloca i32*, align 8 + %b = alloca i32*, align 8 + %c = alloca i32*, align 8 + store i32 0, i32* %retval, align 4 + %call = call i32 (i8*, ...) @__isoc99_scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %LEN_1D) + %0 = load i32, i32* %LEN_1D, align 4 + %conv = sext i32 %0 to i64 + %mul = mul i64 %conv, 4 + %call1 = call noalias i8* @malloc(i64 %mul) #3 + %1 = bitcast i8* %call1 to i32* + store i32* %1, i32** %a, align 8 + %2 = load i32, i32* %LEN_1D, align 4 + %conv2 = sext i32 %2 to i64 + %mul3 = mul i64 %conv2, 4 + %call4 = call noalias i8* @malloc(i64 %mul3) #3 + %3 = bitcast i8* %call4 to i32* + store i32* %3, i32** %b, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %4 = load i32, i32* %i, align 4 + %5 = load i32, i32* %LEN_1D, align 4 + %cmp = icmp slt i32 %4, %5 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %6 = load i32*, i32** %a, align 8 + %7 = load i32, i32* %i, align 4 + %idxprom = sext i32 %7 to i64 + %arrayidx = getelementptr inbounds i32, i32* %6, i64 %idxprom + %8 = load i32*, i32** %b, align 8 + %9 = load i32, i32* %i, align 4 + %idxprom6 = sext i32 %9 to i64 + %arrayidx7 = getelementptr inbounds i32, i32* %8, i64 %idxprom6 + %call8 = call i32 (i8*, ...) @__isoc99_scanf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.1, i64 0, i64 0), i32* %arrayidx, i32* %arrayidx7) + br label %for.inc + +for.inc: ; preds = %for.body + %10 = load i32, i32* %i, align 4 + %inc = add nsw i32 %10, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %11 = load i32*, i32** %a, align 8 + %12 = load i32*, i32** %b, align 8 + %13 = load i32, i32* %LEN_1D, align 4 + %call9 = call i32* @addLoops(i32* %11, i32* %12, i32 %13) + store i32* %call9, i32** %c, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond10 + +for.cond10: ; preds = %for.inc17, %for.end + %14 = load i32, i32* %i, align 4 + %15 = load i32, i32* %LEN_1D, align 4 + %cmp11 = icmp slt i32 %14, %15 + br i1 %cmp11, label %for.body13, label %for.end19 + +for.body13: ; preds = %for.cond10 + %16 = load i32*, i32** %c, align 8 + %17 = load i32, i32* %i, align 4 + %idxprom14 = sext i32 %17 to i64 + %arrayidx15 = getelementptr inbounds i32, i32* %16, i64 %idxprom14 + %18 = load i32, i32* %arrayidx15, align 4 + %call16 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.2, i64 0, i64 0), i32 %18) + br label %for.inc17 + +for.inc17: ; preds = %for.body13 + %19 = load i32, i32* %i, align 4 + %inc18 = add nsw i32 %19, 1 + store i32 %inc18, i32* %i, align 4 + br label %for.cond10 + +for.end19: ; preds = %for.cond10 + ret i32 0 +} + +declare dso_local i32 @__isoc99_scanf(i8*, ...) #1 + +; Function Attrs: nounwind +declare dso_local noalias i8* @malloc(i64) #2 + +declare dso_local i32 @printf(i8*, ...) #1 + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project 8a5bfbe6db2824642bf9a1d27a24c5b6132b244f)"} +; CHECK: !2 = distinct !{!2, !3} +; CHECK-NEXT: !3 = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-NEXT: !4 = distinct !{!4, !5, !3} +; CHECK-NEXT: !5 = !{!"llvm.loop.unroll.runtime.disable"} +!2 = distinct !{!2, !3, !4} +!3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true} +!4 = !{!"llvm.loop.vectorize.enable", i1 true} Index: llvm/test/Transforms/LoopVectorize/X86/ivdep-novec.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/ivdep-novec.ll @@ -0,0 +1,156 @@ +; RUN: opt < %s -O3 -S | FileCheck %s +; ModuleID = 'dep.c' +; Should not vectorize +source_filename = "dep.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1 +@.str.1 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind uwtable +define dso_local i32* @dep(i32* %A, i32 %n) #0 { +entry: + %A.addr = alloca i32*, align 8 + %n.addr = alloca i32, align 4 + %i = alloca i32, align 4 + store i32* %A, i32** %A.addr, align 8 + store i32 %n, i32* %n.addr, align 4 + store i32 1, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %1 = load i32, i32* %n.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %2 = load i32*, i32** %A.addr, align 8 + %3 = load i32, i32* %i, align 4 + %idxprom = sext i32 %3 to i64 + %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom + %4 = load i32, i32* %arrayidx, align 4 + %5 = load i32*, i32** %A.addr, align 8 + %6 = load i32, i32* %i, align 4 + %sub = sub nsw i32 %6, 1 + %idxprom1 = sext i32 %sub to i64 + %arrayidx2 = getelementptr inbounds i32, i32* %5, i64 %idxprom1 + %7 = load i32, i32* %arrayidx2, align 4 + %add = add nsw i32 %4, %7 + %8 = load i32*, i32** %A.addr, align 8 + %9 = load i32, i32* %i, align 4 + %idxprom3 = sext i32 %9 to i64 + %arrayidx4 = getelementptr inbounds i32, i32* %8, i64 %idxprom3 + store i32 %add, i32* %arrayidx4, align 4 + br label %for.inc + +for.inc: ; preds = %for.body + %10 = load i32, i32* %i, align 4 + %inc = add nsw i32 %10, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond, !llvm.loop !2 + +for.end: ; preds = %for.cond + %11 = load i32*, i32** %A.addr, align 8 + ret i32* %11 +} + +; Function Attrs: noinline nounwind uwtable +define dso_local i32 @main(i32 %argc, i8** %argv) #0 { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + %n = alloca i32, align 4 + %i = alloca i32, align 4 + %A = alloca i32*, align 8 + store i32 0, i32* %retval, align 4 + store i32 %argc, i32* %argc.addr, align 4 + store i8** %argv, i8*** %argv.addr, align 8 + %call = call i32 (i8*, ...) @__isoc99_scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %n) + %0 = load i32, i32* %n, align 4 + %conv = sext i32 %0 to i64 + %mul = mul i64 %conv, 4 + %call1 = call noalias i8* @malloc(i64 %mul) #3 + %1 = bitcast i8* %call1 to i32* + store i32* %1, i32** %A, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %2 = load i32, i32* %i, align 4 + %3 = load i32, i32* %n, align 4 + %cmp = icmp slt i32 %2, %3 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %4 = load i32*, i32** %A, align 8 + %5 = load i32, i32* %i, align 4 + %idxprom = sext i32 %5 to i64 + %arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom + %call3 = call i32 (i8*, ...) @__isoc99_scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %arrayidx) + br label %for.inc + +for.inc: ; preds = %for.body + %6 = load i32, i32* %i, align 4 + %inc = add nsw i32 %6, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %7 = load i32*, i32** %A, align 8 + %8 = load i32, i32* %n, align 4 + %call4 = call i32* @dep(i32* %7, i32 %8) + store i32* %call4, i32** %A, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond5 + +for.cond5: ; preds = %for.inc12, %for.end + %9 = load i32, i32* %i, align 4 + %10 = load i32, i32* %n, align 4 + %cmp6 = icmp slt i32 %9, %10 + br i1 %cmp6, label %for.body8, label %for.end14 + +for.body8: ; preds = %for.cond5 + %11 = load i32*, i32** %A, align 8 + %12 = load i32, i32* %i, align 4 + %idxprom9 = sext i32 %12 to i64 + %arrayidx10 = getelementptr inbounds i32, i32* %11, i64 %idxprom9 + %13 = load i32, i32* %arrayidx10, align 4 + %call11 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.1, i64 0, i64 0), i32 %13) + br label %for.inc12 + +for.inc12: ; preds = %for.body8 + %14 = load i32, i32* %i, align 4 + %inc13 = add nsw i32 %14, 1 + store i32 %inc13, i32* %i, align 4 + br label %for.cond5 + +for.end14: ; preds = %for.cond5 + ret i32 0 +} + +declare dso_local i32 @__isoc99_scanf(i8*, ...) #1 + +; Function Attrs: nounwind +declare dso_local noalias i8* @malloc(i64) #2 + +declare dso_local i32 @printf(i8*, ...) #1 + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project 8a5bfbe6db2824642bf9a1d27a24c5b6132b244f)"} +; CHECK: !2 = distinct !{!2, !3, !4} +; CHECK-NEXT: !3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true} +; CHECK-NEXT: !4 = !{!"llvm.loop.vectorize.enable", i1 true} +!2 = distinct !{!2, !3, !4} +!3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true} +!4 = !{!"llvm.loop.vectorize.enable", i1 true} Index: llvm/test/Transforms/LoopVectorize/X86/ivdep-unkbounds.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/ivdep-unkbounds.ll @@ -0,0 +1,195 @@ +; RUN: opt < %s -O3 -S | FileCheck %s +; ModuleID = 'unkbounds.c' +source_filename = "unkbounds.c" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + +@.str = private unnamed_addr constant [2 x i8] c"r\00", align 1 +@.str.1 = private unnamed_addr constant [3 x i8] c"%d\00", align 1 +@.str.2 = private unnamed_addr constant [14 x i8] c"Scanned n:%d\0A\00", align 1 +@.str.3 = private unnamed_addr constant [19 x i8] c"Invalid array size\00", align 1 +@.str.4 = private unnamed_addr constant [12 x i8] c"Scanned:%d\0A\00", align 1 +@.str.5 = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1 + +; Function Attrs: noinline nounwind uwtable +define dso_local i32* @doublefirst20(i32* %A) #0 { +; CHECK: entry: +; CHECK: br label %vector.body + +entry: + %A.addr = alloca i32*, align 8 + %i = alloca i32, align 4 + store i32* %A, i32** %A.addr, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %cmp = icmp slt i32 %0, 64 + br i1 %cmp, label %for.body, label %for.end + +; CHECK: vector.body: +for.body: ; preds = %for.cond + %1 = load i32*, i32** %A.addr, align 8 + %2 = load i32, i32* %i, align 4 + %3 = load i32, i32* %i, align 4 + %mul = mul nsw i32 %2, %3 + %idxprom = sext i32 %mul to i64 + %arrayidx = getelementptr inbounds i32, i32* %1, i64 %idxprom + %4 = load i32, i32* %arrayidx, align 4 + %mul1 = mul nsw i32 %4, 2 + store i32 %mul1, i32* %arrayidx, align 4 + br label %for.inc +; CHECK: br i1 %23, label %for.end, label %vector.body, !llvm.loop !2 + +for.inc: ; preds = %for.body + %5 = load i32, i32* %i, align 4 + %inc = add nsw i32 %5, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond, !llvm.loop !2 + +for.end: ; preds = %for.cond + %6 = load i32*, i32** %A.addr, align 8 + ret i32* %6 +} + +; Function Attrs: noinline nounwind uwtable +define dso_local i32 @main(i32 %argc, i8** %argv) #0 { +entry: + %retval = alloca i32, align 4 + %argc.addr = alloca i32, align 4 + %argv.addr = alloca i8**, align 8 + %n = alloca i32, align 4 + %i = alloca i32, align 4 + %fname = alloca i8*, align 8 + %fptr = alloca %struct._IO_FILE*, align 8 + %A = alloca i32*, align 8 + store i32 0, i32* %retval, align 4 + store i32 %argc, i32* %argc.addr, align 4 + store i8** %argv, i8*** %argv.addr, align 8 + %0 = load i8**, i8*** %argv.addr, align 8 + %arrayidx = getelementptr inbounds i8*, i8** %0, i64 1 + %1 = load i8*, i8** %arrayidx, align 8 + store i8* %1, i8** %fname, align 8 + %2 = load i8*, i8** %fname, align 8 + %call = call %struct._IO_FILE* @fopen(i8* %2, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i64 0, i64 0)) + store %struct._IO_FILE* %call, %struct._IO_FILE** %fptr, align 8 + %3 = load %struct._IO_FILE*, %struct._IO_FILE** %fptr, align 8 + %cmp = icmp ne %struct._IO_FILE* %3, null + br i1 %cmp, label %if.then, label %if.end25 + +if.then: ; preds = %entry + %4 = load %struct._IO_FILE*, %struct._IO_FILE** %fptr, align 8 + %call1 = call i32 (%struct._IO_FILE*, i8*, ...) @__isoc99_fscanf(%struct._IO_FILE* %4, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.1, i64 0, i64 0), i32* %n) + %5 = load i32, i32* %n, align 4 + %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @.str.2, i64 0, i64 0), i32 %5) + %6 = load i32, i32* %n, align 4 + %cmp3 = icmp slt i32 %6, 4096 + br i1 %cmp3, label %if.then4, label %if.end + +if.then4: ; preds = %if.then + %call5 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([19 x i8], [19 x i8]* @.str.3, i64 0, i64 0)) + store i32 0, i32* %retval, align 4 + br label %if.end25 + +if.end: ; preds = %if.then + %7 = load i32, i32* %n, align 4 + %conv = sext i32 %7 to i64 + %mul = mul i64 %conv, 4 + %call6 = call noalias i8* @malloc(i64 %mul) #3 + %8 = bitcast i8* %call6 to i32* + store i32* %8, i32** %A, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %if.end + %9 = load i32, i32* %i, align 4 + %10 = load i32, i32* %n, align 4 + %cmp7 = icmp slt i32 %9, %10 + br i1 %cmp7, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %11 = load %struct._IO_FILE*, %struct._IO_FILE** %fptr, align 8 + %12 = load i32*, i32** %A, align 8 + %13 = load i32, i32* %i, align 4 + %idxprom = sext i32 %13 to i64 + %arrayidx9 = getelementptr inbounds i32, i32* %12, i64 %idxprom + %call10 = call i32 (%struct._IO_FILE*, i8*, ...) @__isoc99_fscanf(%struct._IO_FILE* %11, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str.1, i64 0, i64 0), i32* %arrayidx9) + %14 = load i32*, i32** %A, align 8 + %15 = load i32, i32* %i, align 4 + %idxprom11 = sext i32 %15 to i64 + %arrayidx12 = getelementptr inbounds i32, i32* %14, i64 %idxprom11 + %16 = load i32, i32* %arrayidx12, align 4 + %call13 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str.4, i64 0, i64 0), i32 %16) + br label %for.inc + +for.inc: ; preds = %for.body + %17 = load i32, i32* %i, align 4 + %inc = add nsw i32 %17, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %18 = load i32*, i32** %A, align 8 + %call14 = call i32* @doublefirst20(i32* %18) + store i32* %call14, i32** %A, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond15 + +for.cond15: ; preds = %for.inc22, %for.end + %19 = load i32, i32* %i, align 4 + %20 = load i32, i32* %n, align 4 + %cmp16 = icmp slt i32 %19, %20 + br i1 %cmp16, label %for.body18, label %for.end24 + +for.body18: ; preds = %for.cond15 + %21 = load i32*, i32** %A, align 8 + %22 = load i32, i32* %i, align 4 + %idxprom19 = sext i32 %22 to i64 + %arrayidx20 = getelementptr inbounds i32, i32* %21, i64 %idxprom19 + %23 = load i32, i32* %arrayidx20, align 4 + %call21 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @.str.5, i64 0, i64 0), i32 %23) + br label %for.inc22 + +for.inc22: ; preds = %for.body18 + %24 = load i32, i32* %i, align 4 + %inc23 = add nsw i32 %24, 1 + store i32 %inc23, i32* %i, align 4 + br label %for.cond15 + +for.end24: ; preds = %for.cond15 + store i32 0, i32* %retval, align 4 + br label %if.end25 + +if.end25: ; preds = %if.then4, %for.end24, %entry + %25 = load i32, i32* %retval, align 4 + ret i32 %25 +} + +declare dso_local %struct._IO_FILE* @fopen(i8*, i8*) #1 + +declare dso_local i32 @__isoc99_fscanf(%struct._IO_FILE*, i8*, ...) #1 + +declare dso_local i32 @printf(i8*, ...) #1 + +; Function Attrs: nounwind +declare dso_local noalias i8* @malloc(i64) #2 + +attributes #0 = { noinline nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #3 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project 8a5bfbe6db2824642bf9a1d27a24c5b6132b244f)"} +; CHECK: !2 = distinct !{!2, !3} +; CHECK-NEXT: !3 = !{!"llvm.loop.isvectorized", i32 1} +!2 = distinct !{!2, !3, !4} +!3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true} +!4 = !{!"llvm.loop.vectorize.enable", i1 true} Index: llvm/test/Transforms/LoopVectorize/X86/ivdep-unkdep.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/LoopVectorize/X86/ivdep-unkdep.ll @@ -0,0 +1,123 @@ +; RUN: opt < %s -O3 -S | FileCheck %s +; ModuleID = 'test.cpp' +source_filename = "test.cpp" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@.str = private unnamed_addr constant [3 x i8] c"%d\00", align 1 + +; Function Attrs: noinline norecurse uwtable +define dso_local i32 @main() #0 { +entry: + %retval = alloca i32, align 4 + %i = alloca i32, align 4 + %n = alloca i32, align 4 + %r = alloca i32, align 4 + %m = alloca i32, align 4 + %k = alloca i32, align 4 + %c = alloca i32, align 4 + %saved_stack = alloca i8*, align 8 + %__vla_expr0 = alloca i64, align 8 + %i5 = alloca i32, align 4 + store i32 0, i32* %retval, align 4 + store i32 1, i32* %r, align 4 + %call = call i32 (i8*, ...) @scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %n) + %0 = load i32, i32* %n, align 4 + %1 = zext i32 %0 to i64 + %2 = call i8* @llvm.stacksave() + store i8* %2, i8** %saved_stack, align 8 + %vla = alloca i32, i64 %1, align 16 + store i64 %1, i64* %__vla_expr0, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %3 = load i32, i32* %i, align 4 + %4 = load i32, i32* %n, align 4 + %cmp = icmp slt i32 %3, %4 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %5 = load i32, i32* %i, align 4 + %idxprom = sext i32 %5 to i64 + %arrayidx = getelementptr inbounds i32, i32* %vla, i64 %idxprom + %call1 = call i32 (i8*, ...) @scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %arrayidx) + br label %for.inc + +for.inc: ; preds = %for.body + %6 = load i32, i32* %i, align 4 + %inc = add nsw i32 %6, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + %call2 = call i32 (i8*, ...) @scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %k) + %call3 = call i32 (i8*, ...) @scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %m) + %call4 = call i32 (i8*, ...) @scanf(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str, i64 0, i64 0), i32* %c) + store i32 0, i32* %i5, align 4 + br label %for.cond6 + +for.cond6: ; preds = %for.inc13, %for.end + %7 = load i32, i32* %i5, align 4 + %8 = load i32, i32* %m, align 4 + %cmp7 = icmp slt i32 %7, %8 + br i1 %cmp7, label %for.body8, label %for.end15 +;CHECK: vector.ph +;CHECK: br i1 %13, label %middle.block.unr-lcssa, label %vector.ph.new +;CHECK: vector.ph.new: +;CHECK: br label %vector.body + +for.body8: ; preds = %for.cond6 + %9 = load i32, i32* %i5, align 4 + %10 = load i32, i32* %k, align 4 + %add = add nsw i32 %9, %10 + %idxprom9 = sext i32 %add to i64 + %arrayidx10 = getelementptr inbounds i32, i32* %vla, i64 %idxprom9 + %11 = load i32, i32* %arrayidx10, align 4 + %12 = load i32, i32* %c, align 4 + %mul = mul nsw i32 %11, %12 + %13 = load i32, i32* %i5, align 4 + %idxprom11 = sext i32 %13 to i64 + %arrayidx12 = getelementptr inbounds i32, i32* %vla, i64 %idxprom11 + store i32 %mul, i32* %arrayidx12, align 4 + br label %for.inc13 +;CHECK: br i1 %niter.ncmp.1, label %middle.block.unr-lcssa, label %vector.body, !llvm.loop !2 + +for.inc13: ; preds = %for.body8 + %14 = load i32, i32* %i5, align 4 + %inc14 = add nsw i32 %14, 1 + store i32 %inc14, i32* %i5, align 4 + br label %for.cond6, !llvm.loop !2 + +for.end15: ; preds = %for.cond6 + store i32 0, i32* %retval, align 4 + %15 = load i8*, i8** %saved_stack, align 8 + call void @llvm.stackrestore(i8* %15) + %16 = load i32, i32* %retval, align 4 + ret i32 %16 +} + +declare dso_local i32 @scanf(i8*, ...) #1 + +; Function Attrs: nounwind +declare i8* @llvm.stacksave() #2 + +; Function Attrs: nounwind +declare void @llvm.stackrestore(i8*) #2 + +attributes #0 = { noinline norecurse uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!llvm.module.flags = !{!0} +!llvm.ident = !{!1} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!"clang version 10.0.0 (https://github.com/llvm/llvm-project 8a5bfbe6db2824642bf9a1d27a24c5b6132b244f)"} +; CHECK: !2 = distinct !{!2, !3} +; CHECK-NEXT: !3 = !{!"llvm.loop.isvectorized", i32 1} +; CHECK-NEXT: !4 = distinct !{!4, !5, !3} +; CHECK-NEXT: !5 = !{!"llvm.loop.unroll.runtime.disable"} +!2 = distinct !{!2, !3, !4} +!3 = !{!"llvm.loop.vectorize.ivdep.enable", i1 true} +!4 = !{!"llvm.loop.vectorize.enable", i1 true}