Changeset View
Standalone View
llvm/test/Transforms/LoopVectorize/loopvectorize-opt-remarks.ll
- This file was added.
; RUN: opt -enable-new-pm=0 -loop-vectorize -analyze -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s | |||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" | |||||
fhahn: please avoid adding tests with `-enable-new-pm=0`. You should be able to move that one to the… | |||||
; void test_unknown_bounds(int* A, int* B, int n) { | |||||
; for(int i = 0; i < n ; ++i) | |||||
; A[i] = A[B[i]] + 1; | |||||
Instead of having the C code here, I think it would be more helpful if you explain what this tests in a sentence or 2 with references to the IR, e.g. which memory instruction/GEP has the uncomputable bound and *why*. fhahn: Instead of having the C code here, I think it would be more helpful if you explain what this… | |||||
; } | |||||
; CHECK: remark: source.c:3:5: loop not vectorized: cannot identify array bounds | |||||
; CHECK: remark: source.c:4:16: loop not vectorized: Unknown array bounds | |||||
define void @test_unknown_bounds(i32* nocapture %A, i32* nocapture readonly %B, i64 %n) !dbg !13 { | |||||
entry: | |||||
Can we reduce the unit tests to only what's required? alban.bridonneau: Can we reduce the unit tests to only what's required?
I believe the function attributes like #0… | |||||
Thanks for this suggestion. malharJ: Thanks for this suggestion.
| |||||
Although I've done it now, is there some automated way/script to perform this ? malharJ: Although I've done it now, is there some automated way/script to perform this ?
It took me a… | |||||
Not Done ReplyInline ActionsI am not aware of any tool to do this cleanup alban.bridonneau: I am not aware of any tool to do this cleanup | |||||
%cmp10 = icmp sgt i64 %n, 0 | |||||
br i1 %cmp10, label %for.body, label %for.cond.cleanup | |||||
for.cond.cleanup: ; preds = %for.body, %entry | |||||
ret void | |||||
for.body: ; preds = %entry, %for.body | |||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | |||||
%arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv | |||||
%0 = load i32, i32* %arrayidx, align 4, !tbaa !31 | |||||
This kind of line can also be cleaned. Keep the loop control minimal, so that we can focus on the memory dependencies. alban.bridonneau: This kind of line can also be cleaned. Keep the loop control minimal, so that we can focus on… | |||||
Done. malharJ: Done. | |||||
%idxprom1 = sext i32 %0 to i64, !dbg !35 | |||||
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %idxprom1, !dbg !35 | |||||
%1 = load i32, i32* %arrayidx2, align 4, !dbg !35, !tbaa !31 | |||||
alban.bridonneauUnsubmitted Your tests all use the same types, I believe tbaa is not how the aliasing issues are detected, and you can remove those nodes alban.bridonneau: Your tests all use the same types, I believe tbaa is not how the aliasing issues are detected… | |||||
%add = add nsw i32 %1, 1 | |||||
%arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv | |||||
store i32 %add, i32* %arrayidx4, align 4, !tbaa !31 | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | |||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | |||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !40 | |||||
} | |||||
; // a) Dependence::NoDep | |||||
; // Loop containing only reads does not hinder vectorization | |||||
alban.bridonneauUnsubmitted I am not sure i understand this test. The description says the loop contains only reads, but the IR has stores in it. alban.bridonneau: I am not sure i understand this test. The description says the loop contains only reads, but… | |||||
malharJAuthorUnsubmitted My bad here. malharJ: My bad here.
Updated the patch with the scalar version for the three cases (nodep, forward… | |||||
; void test_nodep(int n, int* A, int* B, int* C) { | |||||
; for(int i = 1; i < n ; ++i) { | |||||
; B[i] = A[i-1]; | |||||
; C[i] = A[i+2]; | |||||
; } | |||||
; } | |||||
; CHECK: remark: source.c:15:3: loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized | |||||
define void @test_nodep(i64 %n, i32* nocapture readonly %A, i32* nocapture %B, i32* nocapture %C) !dbg !43 { | |||||
entry: | |||||
%cmp14 = icmp sgt i64 %n, 1 | |||||
br i1 %cmp14, label %for.body.preheader, label %for.cond.cleanup, !dbg !57 | |||||
for.body.preheader: ; preds = %entry | |||||
%0 = add nsw i64 %n, -1, !dbg !57 | |||||
%min.iters.check = icmp ult i64 %0, 8, !dbg !57 | |||||
br i1 %min.iters.check, label %for.body.preheader40, label %vector.memcheck, !dbg !57 | |||||
vector.memcheck: ; preds = %for.body.preheader | |||||
%scevgep = getelementptr i32, i32* %B, i64 1, !dbg !57 | |||||
%scevgep21 = getelementptr i32, i32* %B, i64 %n, !dbg !57 | |||||
%scevgep23 = getelementptr i32, i32* %C, i64 1, !dbg !57 | |||||
%scevgep25 = getelementptr i32, i32* %C, i64 %n, !dbg !57 | |||||
%1 = add nuw nsw i64 %n, 2, !dbg !57 | |||||
%scevgep28 = getelementptr i32, i32* %A, i64 %1, !dbg !57 | |||||
%bound0 = icmp ult i32* %scevgep, %scevgep25, !dbg !57 | |||||
%bound1 = icmp ult i32* %scevgep23, %scevgep21, !dbg !57 | |||||
%found.conflict = and i1 %bound0, %bound1, !dbg !57 | |||||
%bound030 = icmp ult i32* %scevgep, %scevgep28, !dbg !57 | |||||
%bound131 = icmp ugt i32* %scevgep21, %A, !dbg !57 | |||||
%found.conflict32 = and i1 %bound030, %bound131, !dbg !57 | |||||
%conflict.rdx = or i1 %found.conflict, %found.conflict32, !dbg !57 | |||||
%bound033 = icmp ult i32* %scevgep23, %scevgep28, !dbg !57 | |||||
%bound134 = icmp ugt i32* %scevgep25, %A, !dbg !57 | |||||
%found.conflict35 = and i1 %bound033, %bound134, !dbg !57 | |||||
%conflict.rdx36 = or i1 %conflict.rdx, %found.conflict35, !dbg !57 | |||||
br i1 %conflict.rdx36, label %for.body.preheader40, label %vector.ph, !dbg !57 | |||||
vector.ph: ; preds = %vector.memcheck | |||||
%n.vec = and i64 %0, -8, !dbg !57 | |||||
%ind.end = or i64 %n.vec, 1, !dbg !57 | |||||
br label %vector.body, !dbg !57 | |||||
vector.body: ; preds = %vector.body, %vector.ph | |||||
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | |||||
%offset.idx = or i64 %index, 1 | |||||
%2 = getelementptr inbounds i32, i32* %A, i64 %index | |||||
%3 = bitcast i32* %2 to <4 x i32>* | |||||
%wide.load = load <4 x i32>, <4 x i32>* %3, align 4, !tbaa !31, !alias.scope !60 | |||||
%4 = getelementptr inbounds i32, i32* %2, i64 4 | |||||
%5 = bitcast i32* %4 to <4 x i32>* | |||||
%wide.load37 = load <4 x i32>, <4 x i32>* %5, align 4, !tbaa !31, !alias.scope !60 | |||||
%6 = getelementptr inbounds i32, i32* %B, i64 %offset.idx | |||||
%7 = bitcast i32* %6 to <4 x i32>* | |||||
store <4 x i32> %wide.load, <4 x i32>* %7, align 4, !tbaa !31, !alias.scope !65, !noalias !67 | |||||
%8 = getelementptr inbounds i32, i32* %6, i64 4 | |||||
%9 = bitcast i32* %8 to <4 x i32>* | |||||
store <4 x i32> %wide.load37, <4 x i32>* %9, align 4, !tbaa !31, !alias.scope !65, !noalias !67 | |||||
%10 = or i64 %index, 3 | |||||
%11 = getelementptr inbounds i32, i32* %A, i64 %10 | |||||
%12 = bitcast i32* %11 to <4 x i32>* | |||||
%wide.load38 = load <4 x i32>, <4 x i32>* %12, align 4, !tbaa !31, !alias.scope !60 | |||||
%13 = getelementptr inbounds i32, i32* %11, i64 4 | |||||
%14 = bitcast i32* %13 to <4 x i32>* | |||||
%wide.load39 = load <4 x i32>, <4 x i32>* %14, align 4, !tbaa !31, !alias.scope !60 | |||||
%15 = getelementptr inbounds i32, i32* %C, i64 %offset.idx | |||||
%16 = bitcast i32* %15 to <4 x i32>* | |||||
store <4 x i32> %wide.load38, <4 x i32>* %16, align 4, !tbaa !31, !alias.scope !73, !noalias !60 | |||||
%17 = getelementptr inbounds i32, i32* %15, i64 4 | |||||
%18 = bitcast i32* %17 to <4 x i32>* | |||||
store <4 x i32> %wide.load39, <4 x i32>* %18, align 4, !tbaa !31, !alias.scope !73, !noalias !60 | |||||
%index.next = add nuw i64 %index, 8 | |||||
%19 = icmp eq i64 %index.next, %n.vec | |||||
br i1 %19, label %middle.block, label %vector.body, !llvm.loop !74 | |||||
middle.block: ; preds = %vector.body | |||||
%cmp.n = icmp eq i64 %0, %n.vec, !dbg !57 | |||||
br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader40, !dbg !57 | |||||
for.body.preheader40: ; preds = %vector.memcheck, %for.body.preheader, %middle.block | |||||
%indvars.iv.ph = phi i64 [ 1, %vector.memcheck ], [ 1, %for.body.preheader ], [ %ind.end, %middle.block ] | |||||
br label %for.body, !dbg !57 | |||||
for.cond.cleanup: ; preds = %for.body, %middle.block, %entry | |||||
ret void | |||||
for.body: ; preds = %for.body.preheader40, %for.body | |||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader40 ] | |||||
%20 = add nsw i64 %indvars.iv, -1 | |||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %20 | |||||
%21 = load i32, i32* %arrayidx, align 4, !tbaa !31 | |||||
%arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv | |||||
store i32 %21, i32* %arrayidx2, align 4, !tbaa !31 | |||||
%22 = add nuw nsw i64 %indvars.iv, 2 | |||||
%arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %22 | |||||
%23 = load i32, i32* %arrayidx4, align 4, !tbaa !31 | |||||
%arrayidx6 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv | |||||
store i32 %23, i32* %arrayidx6, align 4, !tbaa !31 | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | |||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | |||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !57, !llvm.loop !80 | |||||
} | |||||
; // b) Dependence::Forward | |||||
; // Loop gets vectorized since it contains only a forward | |||||
; // dependency between A[i-2] and A[i] | |||||
; void test_forward(int n, int* A, int* B) { | |||||
; for(int i=1; i < n; ++i) { | |||||
; A[i] = 10; | |||||
; B[i] = A[i-2]; | |||||
; } | |||||
; } | |||||
; CHECK: remark: source.c:25:3: loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized | |||||
define void @test_forward(i64 %n, i32* nocapture %A, i32* nocapture %B) !dbg !81 { | |||||
entry: | |||||
%cmp11 = icmp sgt i64 %n, 1 | |||||
br i1 %cmp11, label %for.body.preheader, label %for.cond.cleanup, !dbg !94 | |||||
for.body.preheader: ; preds = %entry | |||||
%0 = add nsw i64 %n, -1, !dbg !94 | |||||
%min.iters.check = icmp ult i64 %0, 4, !dbg !94 | |||||
br i1 %min.iters.check, label %for.body.preheader22, label %vector.memcheck, !dbg !94 | |||||
vector.memcheck: ; preds = %for.body.preheader | |||||
%scevgep = getelementptr i32, i32* %A, i64 -1, !dbg !94 | |||||
%scevgep16 = getelementptr i32, i32* %A, i64 %n, !dbg !94 | |||||
%scevgep18 = getelementptr i32, i32* %B, i64 1, !dbg !94 | |||||
%scevgep20 = getelementptr i32, i32* %B, i64 %n, !dbg !94 | |||||
%bound0 = icmp ult i32* %scevgep, %scevgep20, !dbg !94 | |||||
%bound1 = icmp ult i32* %scevgep18, %scevgep16, !dbg !94 | |||||
%found.conflict = and i1 %bound0, %bound1, !dbg !94 | |||||
br i1 %found.conflict, label %for.body.preheader22, label %vector.ph, !dbg !94 | |||||
vector.ph: ; preds = %vector.memcheck | |||||
%n.vec = and i64 %0, -4, !dbg !94 | |||||
%ind.end = or i64 %n.vec, 1, !dbg !94 | |||||
br label %vector.body, !dbg !94 | |||||
vector.body: ; preds = %vector.body, %vector.ph | |||||
alban.bridonneauUnsubmitted same here, the IR is already vectorized. For such a patch i would have expected all loops to be scalar, as before entering the Loop Vectorizer alban.bridonneau: same here, the IR is already vectorized. For such a patch i would have expected all loops to be… | |||||
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | |||||
%offset.idx = or i64 %index, 1 | |||||
%1 = getelementptr inbounds i32, i32* %A, i64 %offset.idx | |||||
%2 = bitcast i32* %1 to <4 x i32>* | |||||
store <4 x i32> <i32 10, i32 10, i32 10, i32 10>, <4 x i32>* %2, align 4, !tbaa !31, !alias.scope !98, !noalias !101 | |||||
%3 = add i64 %index, -1 | |||||
%4 = getelementptr inbounds i32, i32* %A, i64 %3 | |||||
%5 = bitcast i32* %4 to <4 x i32>* | |||||
%wide.load = load <4 x i32>, <4 x i32>* %5, align 4, !tbaa !31, !alias.scope !98, !noalias !101 | |||||
%6 = getelementptr inbounds i32, i32* %B, i64 %offset.idx | |||||
%7 = bitcast i32* %6 to <4 x i32>* | |||||
store <4 x i32> %wide.load, <4 x i32>* %7, align 4, !tbaa !31, !alias.scope !101 | |||||
%index.next = add nuw i64 %index, 4 | |||||
%8 = icmp eq i64 %index.next, %n.vec | |||||
br i1 %8, label %middle.block, label %vector.body, !llvm.loop !107 | |||||
middle.block: ; preds = %vector.body | |||||
%cmp.n = icmp eq i64 %0, %n.vec, !dbg !94 | |||||
br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader22, !dbg !94 | |||||
for.body.preheader22: ; preds = %vector.memcheck, %for.body.preheader, %middle.block | |||||
%indvars.iv.ph = phi i64 [ 1, %vector.memcheck ], [ 1, %for.body.preheader ], [ %ind.end, %middle.block ] | |||||
br label %for.body, !dbg !94 | |||||
for.cond.cleanup: ; preds = %for.body, %middle.block, %entry | |||||
ret void | |||||
for.body: ; preds = %for.body.preheader22, %for.body | |||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader22 ] | |||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv | |||||
store i32 10, i32* %arrayidx, align 4, !tbaa !31 | |||||
%9 = add nsw i64 %indvars.iv, -2 | |||||
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %9 | |||||
%10 = load i32, i32* %arrayidx2, align 4, !tbaa !31 | |||||
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv | |||||
store i32 %10, i32* %arrayidx4, align 4, !tbaa !31 | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | |||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | |||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !94, !llvm.loop !111 | |||||
} | |||||
; // c) Dependence::BackwardVectorizable | |||||
; // Loop gets vectorized since it contains a backward dependency | |||||
; // between A[i] and A[i-4], but the dependency distance (4) is | |||||
; // greater than the minimum possible VF (2 in this case) | |||||
; void test_backwardVectorizable(int n, int* A, int* B) { | |||||
; for(int i=4; i < n; ++i) { | |||||
; A[i] = A[i-4] + 1; | |||||
; } | |||||
; } | |||||
; CHECK: remark: source.c:37:3: loop not vectorized: vectorization and interleaving are explicitly disabled, or the loop has already been vectorized | |||||
define void @test_backwardVectorizable(i64 %n, i32* nocapture %A, i32* nocapture readnone %B) !dbg !112 { | |||||
entry: | |||||
%A11 = bitcast i32* %A to <4 x i32>* | |||||
%cmp8 = icmp sgt i64 %n, 4 | |||||
br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup | |||||
for.body.preheader: ; preds = %entry | |||||
%0 = add nsw i64 %n, -4 | |||||
%min.iters.check = icmp ult i64 %0, 4 | |||||
br i1 %min.iters.check, label %for.body.preheader12, label %vector.ph | |||||
vector.ph: ; preds = %for.body.preheader | |||||
%n.vec = and i64 %0, -4 | |||||
%ind.end = add nsw i64 %n.vec, 4 | |||||
%load_initial = load <4 x i32>, <4 x i32>* %A11, align 4 | |||||
br label %vector.body | |||||
vector.body: ; preds = %vector.body, %vector.ph | |||||
%store_forwarded = phi <4 x i32> [ %load_initial, %vector.ph ], [ %1, %vector.body ] | |||||
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] | |||||
%offset.idx = add i64 %index, 4 | |||||
%1 = add nsw <4 x i32> %store_forwarded, <i32 1, i32 1, i32 1, i32 1> | |||||
%2 = getelementptr inbounds i32, i32* %A, i64 %offset.idx | |||||
%3 = bitcast i32* %2 to <4 x i32>* | |||||
store <4 x i32> %1, <4 x i32>* %3, align 4, !tbaa !31 | |||||
%index.next = add nuw i64 %index, 4 | |||||
%4 = icmp eq i64 %index.next, %n.vec | |||||
br i1 %4, label %middle.block, label %vector.body, !llvm.loop !128 | |||||
middle.block: ; preds = %vector.body | |||||
%cmp.n = icmp eq i64 %0, %n.vec | |||||
br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader12 | |||||
for.body.preheader12: ; preds = %for.body.preheader, %middle.block | |||||
%indvars.iv.ph = phi i64 [ 4, %for.body.preheader ], [ %ind.end, %middle.block ] | |||||
br label %for.body | |||||
for.cond.cleanup: ; preds = %for.body, %middle.block, %entry | |||||
ret void | |||||
for.body: ; preds = %for.body.preheader12, %for.body | |||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader12 ] | |||||
%5 = add nsw i64 %indvars.iv, -4 | |||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %5 | |||||
%6 = load i32, i32* %arrayidx, align 4, !tbaa !31 | |||||
%add = add nsw i32 %6, 1 | |||||
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv | |||||
store i32 %add, i32* %arrayidx2, align 4, !tbaa !31 | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | |||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | |||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !134 | |||||
} | |||||
; // d) Dependence::Backward | |||||
; // Loop does not get vectorized since it contains a backward | |||||
; // dependency between A[i] and A[i+3]. | |||||
; void test_backward_dep(int n, double *A) { | |||||
; for (int i = 1; i <= n - 3; i += 3) { | |||||
; A[i] = A[i-1]; | |||||
; A[i+1] = A[i+3]; | |||||
; } | |||||
; } | |||||
; CHECK: remark: source.c:48:14: loop not vectorized: Backward loop carried data dependence. Memory location is the same as accessed at line 47:5 | |||||
define void @test_backward_dep(i32 %n, double* nocapture %A) { | |||||
entry: | |||||
%cmp.not19 = icmp slt i32 %n, 4 | |||||
br i1 %cmp.not19, label %for.cond.cleanup, label %for.body.preheader | |||||
for.body.preheader: ; preds = %entry | |||||
%sub = add nsw i32 %n, -3 | |||||
%0 = zext i32 %sub to i64 | |||||
br label %for.body | |||||
for.cond.cleanup: ; preds = %for.body, %entry | |||||
ret void | |||||
for.body: ; preds = %for.body.preheader, %for.body | |||||
%indvars.iv = phi i64 [ 1, %for.body.preheader ], [ %indvars.iv.next, %for.body ] | |||||
%1 = add nsw i64 %indvars.iv, -1 | |||||
%arrayidx = getelementptr inbounds double, double* %A, i64 %1 | |||||
%2 = load double, double* %arrayidx, align 8, !tbaa !155 | |||||
%arrayidx3 = getelementptr inbounds double, double* %A, i64 %indvars.iv, !dbg !157 | |||||
store double %2, double* %arrayidx3, align 8, !tbaa !155 | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 3 | |||||
%arrayidx5 = getelementptr inbounds double, double* %A, i64 %indvars.iv.next, !dbg !160 | |||||
%3 = load double, double* %arrayidx5, align 8, !tbaa !155 | |||||
%4 = add nuw nsw i64 %indvars.iv, 1 | |||||
%arrayidx8 = getelementptr inbounds double, double* %A, i64 %4 | |||||
store double %3, double* %arrayidx8, align 8, !tbaa !155 | |||||
%cmp.not = icmp ugt i64 %indvars.iv.next, %0 | |||||
br i1 %cmp.not, label %for.cond.cleanup, label %for.body | |||||
} | |||||
; // e) Dependence::ForwardButPreventsForwarding | |||||
; // Loop does not get vectorized despite only having a forward | |||||
; // dependency between A[i] and A[i-3]. | |||||
; // This is because the store-to-load forwarding distance (here 3) | |||||
; // needs to be a multiple of vector factor otherwise the | |||||
; // store (A[5:6] in i=5) and load (A[4:5],A[6:7] in i=7,9) are unaligned. | |||||
; void test_forwardPreventsForwarding_dep(int* A, int* B, int n) { | |||||
; for(int i=3; i < n; ++i) { | |||||
; A[i] = 10; | |||||
; B[i] = A[i-3]; | |||||
; } | |||||
; } | |||||
; CHECK: remark: source.c:61:12: loop not vectorized: Forward loop carried data dependence that prevents store-to-load forwarding. Memory location is the same as accessed at line 60:5 | |||||
define void @test_forwardPreventsForwarding_dep(i32* nocapture %A, i32* nocapture %B, i64 %n) !dbg !166 { | |||||
entry: | |||||
%cmp11 = icmp sgt i64 %n, 3 | |||||
br i1 %cmp11, label %for.body, label %for.cond.cleanup | |||||
for.cond.cleanup: ; preds = %for.body, %entry | |||||
ret void | |||||
for.body: ; preds = %entry, %for.body | |||||
%indvars.iv = phi i64 [ 3, %entry ], [ %indvars.iv.next, %for.body ] | |||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !179 | |||||
store i32 10, i32* %arrayidx, align 4, !tbaa !31 | |||||
%0 = add nsw i64 %indvars.iv, -3 | |||||
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !183 | |||||
%1 = load i32, i32* %arrayidx2, align 4, !tbaa !31 | |||||
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv | |||||
store i32 %1, i32* %arrayidx4, align 4, !tbaa !31 | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | |||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | |||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body | |||||
} | |||||
; // f) Dependence::BackwardVectorizableButPreventsForwarding | |||||
; // Loop does not get vectorized despite having a backward | |||||
; // but vectorizable dependency between A[i] and A[i-15]. | |||||
; // | |||||
; // This is because the store-to-load forwarding distance (here 15) | |||||
; // needs to be a multiple of vector factor otherwise | |||||
; // store (A[16:17] in i=16) and load (A[15:16], A[17:18] in i=30,32) are unaligned. | |||||
; void test_backwardVectorizableButPreventsForwarding(int* A, int n) { | |||||
; for(int i=15; i < n; ++i) { | |||||
; A[i] = A[i-2] + A[i-15]; | |||||
; } | |||||
; } | |||||
; CHECK: remark: source.c:74:5: loop not vectorized: Backward loop carried data dependence that prevents store-to-load forwarding. Memory location is the same as accessed at line 74:21 | |||||
define void @test_backwardVectorizableButPreventsForwarding(i32* nocapture %A, i64 %n) !dbg !189 { | |||||
entry: | |||||
%cmp13 = icmp sgt i64 %n, 15 | |||||
br i1 %cmp13, label %for.body, label %for.cond.cleanup | |||||
for.cond.cleanup: ; preds = %for.body, %entry | |||||
ret void | |||||
for.body: ; preds = %entry, %for.body | |||||
%indvars.iv = phi i64 [ 15, %entry ], [ %indvars.iv.next, %for.body ] | |||||
%0 = add nsw i64 %indvars.iv, -2 | |||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %0 | |||||
%1 = load i32, i32* %arrayidx, align 4, !tbaa !31 | |||||
%2 = add nsw i64 %indvars.iv, -15 | |||||
%arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %2, !dbg !207 | |||||
%3 = load i32, i32* %arrayidx3, align 4, !tbaa !31 | |||||
%add = add nsw i32 %3, %1 | |||||
%arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !209 | |||||
store i32 %add, i32* %arrayidx5, align 4, !tbaa !31 | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | |||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | |||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body | |||||
} | |||||
; // g) Dependence::Unknown | |||||
; // Different stride lengths | |||||
; void test_unknown_dep(int* A, int n) { | |||||
; for(int i=0; i < n; ++i) { | |||||
; A[(i+1)*4] = 10; | |||||
; A[i] = 100; | |||||
; } | |||||
; } | |||||
; CHECK: remark: source.c:83:7: loop not vectorized: Unknown data dependence. Memory location is the same as accessed at line 82:7 | |||||
define void @test_unknown_dep(i32* nocapture %A, i64 %n) !dbg !214 { | |||||
entry: | |||||
%cmp8 = icmp sgt i64 %n, 0 | |||||
br i1 %cmp8, label %for.body, label %for.cond.cleanup | |||||
for.cond.cleanup: ; preds = %for.body, %entry | |||||
ret void | |||||
for.body: ; preds = %entry, %for.body | |||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | |||||
%0 = shl nsw i64 %indvars.iv.next, 2 | |||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !229 | |||||
store i32 10, i32* %arrayidx, align 4, !tbaa !31 | |||||
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !231 | |||||
store i32 100, i32* %arrayidx2, align 4, !tbaa !31 | |||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | |||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body | |||||
} | |||||
!llvm.dbg.cu = !{!0} | |||||
!llvm.module.flags = !{!4} | |||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0 (https://github.com/llvm/llvm-project.git 54f0f826c5c7d0ff16c230b259cb6aad33e18d97)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) | |||||
!1 = !DIFile(filename: "source.c", directory: "") | |||||
!2 = !{} | |||||
!4 = !{i32 2, !"Debug Info Version", i32 3} | |||||
!13 = distinct !DISubprogram(name: "test_unknown_bounds", scope: !1, file: !1, line: 2, type: !14, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !18) | |||||
!14 = !DISubroutineType(types: !15) | |||||
!15 = !{null, !16, !16, !17} | |||||
!16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 64) | |||||
!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) | |||||
!18 = !{} | |||||
!23 = distinct !DILexicalBlock(scope: !13, file: !1, line: 3, column: 5) | |||||
!27 = distinct !DILexicalBlock(scope: !23, file: !1, line: 3, column: 5) | |||||
!28 = !DILocation(line: 3, column: 5, scope: !23) | |||||
!31 = !{!32, !32, i64 0} | |||||
!32 = !{!"int", !33, i64 0} | |||||
!33 = !{!"omnipotent char", !34, i64 0} | |||||
!34 = !{!"Simple C/C++ TBAA"} | |||||
!35 = !DILocation(line: 4, column: 16, scope: !27) | |||||
!40 = distinct !{!40, !28, !42} | |||||
!42 = !{!"llvm.loop.mustprogress"} | |||||
!43 = distinct !DISubprogram(name: "test_nodep", scope: !1, file: !1, line: 14, type: !44, scopeLine: 14, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !46) | |||||
!44 = !DISubroutineType(types: !45) | |||||
!45 = !{null, !17, !16, !16, !16} | |||||
!46 = !{} | |||||
!52 = distinct !DILexicalBlock(scope: !43, file: !1, line: 15, column: 3) | |||||
!56 = distinct !DILexicalBlock(scope: !52, file: !1, line: 15, column: 3) | |||||
!57 = !DILocation(line: 15, column: 3, scope: !52) | |||||
!60 = !{!61} | |||||
!61 = distinct !{!61, !62} | |||||
!62 = distinct !{!62, !"LVerDomain"} | |||||
!65 = !{!66} | |||||
!66 = distinct !{!66, !62} | |||||
!67 = !{!68, !61} | |||||
!68 = distinct !{!68, !62} | |||||
!73 = !{!68} | |||||
!74 = distinct !{!74, !57, !75, !42, !76} | |||||
!75 = !DILocation(line: 18, column: 3, scope: !52) | |||||
!76 = !{!"llvm.loop.isvectorized", i32 1} | |||||
!80 = distinct !{!80, !57, !75, !42, !76} | |||||
!81 = distinct !DISubprogram(name: "test_forward", scope: !1, file: !1, line: 24, type: !82, scopeLine: 24, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !84) | |||||
!82 = !DISubroutineType(types: !83) | |||||
!83 = !{null, !17, !16, !16} | |||||
!84 = !{} | |||||
!89 = distinct !DILexicalBlock(scope: !81, file: !1, line: 25, column: 3) | |||||
!93 = distinct !DILexicalBlock(scope: !89, file: !1, line: 25, column: 3) | |||||
!94 = !DILocation(line: 25, column: 3, scope: !89) | |||||
!98 = !{!99} | |||||
!99 = distinct !{!99, !100} | |||||
!100 = distinct !{!100, !"LVerDomain"} | |||||
!101 = !{!102} | |||||
!102 = distinct !{!102, !100} | |||||
!107 = distinct !{!107, !94, !108, !42, !76} | |||||
!108 = !DILocation(line: 28, column: 3, scope: !89) | |||||
!111 = distinct !{!111, !94, !108, !42, !76} | |||||
!112 = distinct !DISubprogram(name: "test_backwardVectorizable", scope: !1, file: !1, line: 36, type: !82, scopeLine: 36, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !113) | |||||
!113 = !{} | |||||
!118 = distinct !DILexicalBlock(scope: !112, file: !1, line: 37, column: 3) | |||||
!122 = distinct !DILexicalBlock(scope: !118, file: !1, line: 37, column: 3) | |||||
!123 = !DILocation(line: 37, column: 3, scope: !118) | |||||
!128 = distinct !{!128, !123, !42, !76} | |||||
!134 = distinct !{!134, !123, !42, !135, !76} | |||||
!135 = !{!"llvm.loop.unroll.runtime.disable"} | |||||
!136 = distinct !DISubprogram(name: "test_backward_dep", scope: !1, file: !1, line: 45, type: !137, scopeLine: 45, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !141) | |||||
!137 = !DISubroutineType(types: !138) | |||||
!138 = !{null, !17, !139} | |||||
!139 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !140, size: 64) | |||||
!140 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float) | |||||
!141 = !{} | |||||
!145 = distinct !DILexicalBlock(scope: !136, file: !1, line: 46, column: 3) | |||||
!149 = distinct !DILexicalBlock(scope: !145, file: !1, line: 46, column: 3) | |||||
!153 = distinct !DILexicalBlock(scope: !149, file: !1, line: 46, column: 39) | |||||
!155 = !{!156, !156, i64 0} | |||||
!156 = !{!"double", !33, i64 0} | |||||
!157 = !DILocation(line: 47, column: 5, scope: !153) | |||||
!160 = !DILocation(line: 48, column: 14, scope: !153) | |||||
!166 = distinct !DISubprogram(name: "test_forwardPreventsForwarding_dep", scope: !1, file: !1, line: 58, type: !14, scopeLine: 58, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !167) | |||||
!167 = !{} | |||||
!172 = distinct !DILexicalBlock(scope: !166, file: !1, line: 59, column: 3) | |||||
!175 = !DILocation(line: 59, column: 18, scope: !176) | |||||
!176 = distinct !DILexicalBlock(scope: !172, file: !1, line: 59, column: 3) | |||||
!177 = !DILocation(line: 59, column: 3, scope: !172) | |||||
!178 = !DILocation(line: 63, column: 1, scope: !166) | |||||
!179 = !DILocation(line: 60, column: 5, scope: !180) | |||||
!180 = distinct !DILexicalBlock(scope: !176, file: !1, line: 59, column: 28) | |||||
!181 = !DILocation(line: 60, column: 10, scope: !180) | |||||
!182 = !DILocation(line: 61, column: 15, scope: !180) | |||||
!183 = !DILocation(line: 61, column: 12, scope: !180) | |||||
!184 = !DILocation(line: 61, column: 5, scope: !180) | |||||
!185 = !DILocation(line: 61, column: 10, scope: !180) | |||||
!186 = !DILocation(line: 59, column: 23, scope: !176) | |||||
!187 = distinct !{!187, !177, !188, !42} | |||||
!188 = !DILocation(line: 62, column: 3, scope: !172) | |||||
!189 = distinct !DISubprogram(name: "test_backwardVectorizableButPreventsForwarding", scope: !1, file: !1, line: 72, type: !190, scopeLine: 72, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !192) | |||||
!190 = !DISubroutineType(types: !191) | |||||
!191 = !{null, !16, !17} | |||||
!192 = !{} | |||||
!196 = distinct !DILexicalBlock(scope: !189, file: !1, line: 73, column: 3) | |||||
!200 = distinct !DILexicalBlock(scope: !196, file: !1, line: 73, column: 3) | |||||
!204 = distinct !DILexicalBlock(scope: !200, file: !1, line: 73, column: 29) | |||||
!207 = !DILocation(line: 74, column: 21, scope: !204) | |||||
!209 = !DILocation(line: 74, column: 5, scope: !204) | |||||
!214 = distinct !DISubprogram(name: "test_unknown_dep", scope: !1, file: !1, line: 80, type: !190, scopeLine: 80, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !215) | |||||
!215 = !{} | |||||
!219 = distinct !DILexicalBlock(scope: !214, file: !1, line: 81, column: 3) | |||||
!223 = distinct !DILexicalBlock(scope: !219, file: !1, line: 81, column: 3) | |||||
!227 = distinct !DILexicalBlock(scope: !223, file: !1, line: 81, column: 28) | |||||
!229 = !DILocation(line: 82, column: 7, scope: !227) | |||||
!231 = !DILocation(line: 83, column: 7, scope: !227) |
please avoid adding tests with -enable-new-pm=0. You should be able to move that one to the LoopAccessAnalysis directory.