Changeset View
Changeset View
Standalone View
Standalone View
llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll
- This file was added.
; RUN: opt -passes='loop(require<access-info>),function(loop-vectorize)' -disable-output -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s | |||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" | |||||
fhahn: For remarks, it might be good to check the full yaml generated, as in… | |||||
; // a) Dependence::NoDep | |||||
; // Loop containing only reads (here of the array A) does not hinder vectorization | |||||
; void test_nodep(int n, int* A, int* B) { | |||||
; for(int i = 1; i < n ; ++i) { | |||||
; B[i] = A[i-1] + A[i+2]; | |||||
; } | |||||
; } | |||||
; CHECK-NOT: remark: source.c:{{0-9]+}}:{{[0-9]+}}: | |||||
define void @test_nodep(i64 %n, i32* nocapture readonly %A, i32* nocapture %B) !dbg !44 { | |||||
entry: | |||||
%cmp12 = icmp sgt i64 %n, 1 | |||||
br i1 %cmp12, label %for.body, label %for.cond.cleanup | |||||
Not Done ReplyInline Actionsnit: check here not needed? fhahn: nit: check here not needed? | |||||
for.cond.cleanup: ; preds = %for.body, %entry | |||||
ret void | |||||
nit: easier to read if this is the last block fhahn: nit: easier to read if this is the last block | |||||
for.body: ; preds = %entry, %for.body | |||||
%indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ] | |||||
%0 = add nsw i64 %indvars.iv, -1 | |||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !61 | |||||
%1 = load i32, i32* %arrayidx, align 4, !dbg !61 | |||||
%2 = add nuw nsw i64 %indvars.iv, 2 | |||||
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %2, !dbg !63 | |||||
%3 = load i32, i32* %arrayidx2, align 4, !dbg !63 | |||||
%add3 = add nsw i32 %3, %1 | |||||
%arrayidx5 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv | |||||
store i32 %add3, i32* %arrayidx5, align 4 | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | |||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | |||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body | |||||
} | |||||
; // b) Dependence::Forward | |||||
; // Loop gets vectorized since it contains only a forward | |||||
; // dependency between A[i-2] and A[i] | |||||
; void test_forward(int n, int* A, int* B) { | |||||
; for(int i=1; i < n; ++i) { | |||||
; A[i] = 10; | |||||
; B[i] = A[i-2]; | |||||
; } | |||||
; } | |||||
; CHECK-NOT: remark: source.c:{{0-9]+}}:{{[0-9]+}}: | |||||
define dso_local void @test_forward(i64 %n, i32* nocapture %A, i32* nocapture %B) !dbg !70 { | |||||
entry: | |||||
%cmp11 = icmp sgt i64 %n, 1 | |||||
br i1 %cmp11, label %for.body, label %for.cond.cleanup, !dbg !81 | |||||
for.cond.cleanup: ; preds = %for.body, %entry | |||||
ret void | |||||
for.body: ; preds = %entry, %for.body | |||||
%indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ] | |||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !83 | |||||
store i32 10, i32* %arrayidx, align 4 | |||||
%0 = add nsw i64 %indvars.iv, -2 | |||||
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !87 | |||||
%1 = load i32, i32* %arrayidx2, align 4, !dbg !87 | |||||
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !88 | |||||
store i32 %1, i32* %arrayidx4, align 4, !dbg !89 | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | |||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | |||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !81 | |||||
} | |||||
; // c) Dependence::BackwardVectorizable | |||||
; // Loop gets vectorized since it contains a backward dependency | |||||
; // between A[i] and A[i-4], but the dependency distance (4) is | |||||
; // greater than the minimum possible VF (2 in this case) | |||||
; void test_backwardVectorizable(int n, int* A, int* B) { | |||||
; for(int i=4; i < n; ++i) { | |||||
; A[i] = A[i-4] + 1; | |||||
; } | |||||
; } | |||||
; CHECK-NOT: remark: source.c:{{0-9]+}}:{{[0-9]+}}: | |||||
define dso_local void @test_backwardVectorizable(i64 %n, i32* nocapture %A, i32* nocapture readnone %B) !dbg !93 { | |||||
entry: | |||||
%cmp8 = icmp sgt i64 %n, 4 | |||||
br i1 %cmp8, label %for.body, label %for.cond.cleanup | |||||
for.cond.cleanup: ; preds = %for.body, %entry | |||||
ret void | |||||
for.body: ; preds = %entry, %for.body | |||||
%indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ] | |||||
%0 = add nsw i64 %indvars.iv, -4, !dbg !106 | |||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !108 | |||||
%1 = load i32, i32* %arrayidx, align 4, !dbg !108 | |||||
%add = add nsw i32 %1, 1 | |||||
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !110 | |||||
store i32 %add, i32* %arrayidx2, align 4, !dbg !111 | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | |||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | |||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body | |||||
} | |||||
; // d) Dependence::Backward | |||||
; // Loop does not get vectorized since it contains a backward | |||||
; // dependency between A[i] and A[i+3]. | |||||
; void test_backward_dep(int n, double *A) { | |||||
; for (int i = 1; i <= n - 3; i += 3) { | |||||
; A[i] = A[i-1]; | |||||
; A[i+1] = A[i+3]; | |||||
; } | |||||
; } | |||||
; CHECK: remark: source.c:48:14: loop not vectorized: unsafe dependent memory operations in loop. | |||||
; CHECK-NEXT: Backward loop carried data dependence. Memory location is the same as accessed at source.c:47:5 | |||||
define void @test_backward_dep(i32 %n, double* nocapture %A) { | |||||
entry: | |||||
%cmp.not19 = icmp slt i32 %n, 4 | |||||
Not Done ReplyInline ActionsWhen I try this out with Clang, I see: Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loopBackward loop carried data dependence The issue here is the loopBackward (no period and space between remarks) sdesmalen: When I try this out with Clang, I see:
```Use #pragma loop distribute(enable) to allow loop… | |||||
I'm not very clear on the issue here .. they are already printed on separate lines ? Regardless I've updated the tests to print the entire message. malharJ: I'm not very clear on the issue here .. they are already printed on separate lines ? | |||||
br i1 %cmp.not19, label %for.cond.cleanup, label %for.body.preheader | |||||
Not Done ReplyInline ActionsIs this needed? Same for other tests. fhahn: Is this needed? Same for other tests. | |||||
Can you please explain why this is not needed ? malharJ: Can you please explain why this is not needed ? | |||||
for.body.preheader: ; preds = %entry | |||||
%sub = add nsw i32 %n, -3 | |||||
%0 = zext i32 %sub to i64 | |||||
br label %for.body | |||||
is this needed? fhahn: is this needed? | |||||
thanks. removed it now. changed the type of %n from i32 to i64. malharJ: thanks. removed it now. changed the type of %n from i32 to i64. | |||||
for.cond.cleanup: ; preds = %for.body, %entry | |||||
ret void | |||||
for.body: ; preds = %for.body.preheader, %for.body | |||||
%indvars.iv = phi i64 [ 1, %for.body.preheader ], [ %indvars.iv.next, %for.body ] | |||||
%1 = add nsw i64 %indvars.iv, -1 | |||||
%arrayidx = getelementptr inbounds double, double* %A, i64 %1 | |||||
%2 = load double, double* %arrayidx, align 8 | |||||
%arrayidx3 = getelementptr inbounds double, double* %A, i64 %indvars.iv, !dbg !157 | |||||
store double %2, double* %arrayidx3, align 8 | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 3 | |||||
%arrayidx5 = getelementptr inbounds double, double* %A, i64 %indvars.iv.next, !dbg !160 | |||||
%3 = load double, double* %arrayidx5, align 8, !dbg !160 | |||||
%4 = add nuw nsw i64 %indvars.iv, 1 | |||||
%arrayidx8 = getelementptr inbounds double, double* %A, i64 %4 | |||||
store double %3, double* %arrayidx8, align 8 | |||||
%cmp.not = icmp ugt i64 %indvars.iv.next, %0 | |||||
br i1 %cmp.not, label %for.cond.cleanup, label %for.body | |||||
} | |||||
; // e) Dependence::ForwardButPreventsForwarding | |||||
; // Loop does not get vectorized despite only having a forward | |||||
; // dependency between A[i] and A[i-3]. | |||||
; // This is because the store-to-load forwarding distance (here 3) | |||||
; // needs to be a multiple of vector factor otherwise the | |||||
; // store (A[5:6] in i=5) and load (A[4:5],A[6:7] in i=7,9) are unaligned. | |||||
; void test_forwardPreventsForwarding_dep(int* A, int* B, int n) { | |||||
; for(int i=3; i < n; ++i) { | |||||
; A[i] = 10; | |||||
; B[i] = A[i-3]; | |||||
; } | |||||
; } | |||||
; CHECK: remark: source.c:61:12: loop not vectorized: unsafe dependent memory operations in loop. | |||||
; CHECK-NEXT: Forward loop carried data dependence that prevents store-to-load forwarding. Memory location is the same as accessed at source.c:60:5 | |||||
define void @test_forwardPreventsForwarding_dep(i32* nocapture %A, i32* nocapture %B, i64 %n) !dbg !166 { | |||||
entry: | |||||
%cmp11 = icmp sgt i64 %n, 3 | |||||
br i1 %cmp11, label %for.body, label %for.cond.cleanup | |||||
for.cond.cleanup: ; preds = %for.body, %entry | |||||
ret void | |||||
for.body: ; preds = %entry, %for.body | |||||
%indvars.iv = phi i64 [ 3, %entry ], [ %indvars.iv.next, %for.body ] | |||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !179 | |||||
store i32 10, i32* %arrayidx, align 4 | |||||
%0 = add nsw i64 %indvars.iv, -3 | |||||
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !183 | |||||
%1 = load i32, i32* %arrayidx2, align 4, !dbg !183 | |||||
%arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv | |||||
store i32 %1, i32* %arrayidx4, align 4 | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | |||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | |||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body | |||||
} | |||||
; // f) Dependence::BackwardVectorizableButPreventsForwarding | |||||
; // Loop does not get vectorized despite having a backward | |||||
; // but vectorizable dependency between A[i] and A[i-15]. | |||||
; // | |||||
; // This is because the store-to-load forwarding distance (here 15) | |||||
; // needs to be a multiple of vector factor otherwise | |||||
; // store (A[16:17] in i=16) and load (A[15:16], A[17:18] in i=30,32) are unaligned. | |||||
; void test_backwardVectorizableButPreventsForwarding(int* A, int n) { | |||||
; for(int i=15; i < n; ++i) { | |||||
; A[i] = A[i-2] + A[i-15]; | |||||
; } | |||||
; } | |||||
; CHECK: remark: source.c:74:5: loop not vectorized: unsafe dependent memory operations in loop. | |||||
; CHECK: Backward loop carried data dependence that prevents store-to-load forwarding. Memory location is the same as accessed at source.c:74:21 | |||||
define void @test_backwardVectorizableButPreventsForwarding(i32* nocapture %A, i64 %n) !dbg !189 { | |||||
entry: | |||||
%cmp13 = icmp sgt i64 %n, 15 | |||||
br i1 %cmp13, label %for.body, label %for.cond.cleanup | |||||
for.cond.cleanup: ; preds = %for.body, %entry | |||||
ret void | |||||
for.body: ; preds = %entry, %for.body | |||||
%indvars.iv = phi i64 [ 15, %entry ], [ %indvars.iv.next, %for.body ] | |||||
%0 = add nsw i64 %indvars.iv, -2 | |||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %0 | |||||
%1 = load i32, i32* %arrayidx, align 4 | |||||
%2 = add nsw i64 %indvars.iv, -15 | |||||
%arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %2, !dbg !207 | |||||
%3 = load i32, i32* %arrayidx3, align 4 | |||||
%add = add nsw i32 %3, %1 | |||||
%arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !209 | |||||
store i32 %add, i32* %arrayidx5, align 4, !dbg !209 | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | |||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | |||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body | |||||
} | |||||
; // g) Dependence::Unknown | |||||
; // Different stride lengths | |||||
; void test_unknown_dep(int* A, int n) { | |||||
; for(int i=0; i < n; ++i) { | |||||
; A[(i+1)*4] = 10; | |||||
; A[i] = 100; | |||||
; } | |||||
; } | |||||
; CHECK: remark: source.c:83:7: loop not vectorized: unsafe dependent memory operations in loop. | |||||
; CHECK: Unknown data dependence. Memory location is the same as accessed at source.c:82:7 | |||||
define void @test_unknown_dep(i32* nocapture %A, i64 %n) !dbg !214 { | |||||
entry: | |||||
%cmp8 = icmp sgt i64 %n, 0 | |||||
br i1 %cmp8, label %for.body, label %for.cond.cleanup | |||||
for.cond.cleanup: ; preds = %for.body, %entry | |||||
ret void | |||||
for.body: ; preds = %entry, %for.body | |||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] | |||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 | |||||
%0 = shl nsw i64 %indvars.iv.next, 2 | |||||
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !229 | |||||
store i32 10, i32* %arrayidx, align 4 | |||||
%arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !231 | |||||
store i32 100, i32* %arrayidx2, align 4, !dbg !231 | |||||
%exitcond.not = icmp eq i64 %indvars.iv.next, %n | |||||
br i1 %exitcond.not, label %for.cond.cleanup, label %for.body | |||||
} | |||||
!llvm.dbg.cu = !{!0} | |||||
!llvm.module.flags = !{!4} | |||||
!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0 (https://github.com/llvm/llvm-project.git 54f0f826c5c7d0ff16c230b259cb6aad33e18d97)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) | |||||
!1 = !DIFile(filename: "source.c", directory: "") | |||||
!2 = !{} | |||||
!4 = !{i32 2, !"Debug Info Version", i32 3} | |||||
can the debug lock be trimmed down a bit? fhahn: can the debug lock be trimmed down a bit? | |||||
Done.
malharJ: Done.
- Changed functions to use same parameters (and same ordering).
This helped reduce ! | |||||
!13 = distinct !DISubprogram(name: "test_unknown_bounds", scope: !1, file: !1, line: 2, type: !14, scopeLine: 2, unit: !0, retainedNodes: !18) | |||||
!14 = !DISubroutineType(types: !15) | |||||
!15 = !{null, !16, !16, !17} | |||||
!16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 64) | |||||
!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) | |||||
!18 = !{} | |||||
!23 = distinct !DILexicalBlock(scope: !13, file: !1, line: 3, column: 5) | |||||
!27 = distinct !DILexicalBlock(scope: !23, file: !1, line: 3, column: 5) | |||||
!28 = !DILocation(line: 3, column: 5, scope: !23) | |||||
!35 = !DILocation(line: 4, column: 16, scope: !27) | |||||
!44 = distinct !DISubprogram(name: "test_nodep", scope: !1, file: !1, line: 14, type: !45, scopeLine: 14, unit: !0, retainedNodes: !47) | |||||
!45 = !DISubroutineType(types: !46) | |||||
!46 = !{null, !17, !16, !16} | |||||
!47 = !{} | |||||
!52 = distinct !DILexicalBlock(scope: !44, file: !1, line: 15, column: 3) | |||||
!56 = distinct !DILexicalBlock(scope: !52, file: !1, line: 15, column: 3) | |||||
!60 = distinct !DILexicalBlock(scope: !56, file: !1, line: 15, column: 31) | |||||
!61 = !DILocation(line: 16, column: 12, scope: !60) | |||||
!63 = !DILocation(line: 16, column: 21, scope: !60) | |||||
!70 = distinct !DISubprogram(name: "test_forward", scope: !1, file: !1, line: 24, type: !45, scopeLine: 24, unit: !0, retainedNodes: !71) | |||||
!71 = !{} | |||||
!77 = distinct !DILexicalBlock(scope: !70, file: !1, line: 25, column: 3) | |||||
!80 = distinct !DILexicalBlock(scope: !77, file: !1, line: 25, column: 3) | |||||
!81 = !DILocation(line: 25, column: 3, scope: !77) | |||||
!83 = !DILocation(line: 26, column: 5, scope: !84) | |||||
!84 = distinct !DILexicalBlock(scope: !80, file: !1, line: 25, column: 28) | |||||
!87 = !DILocation(line: 27, column: 12, scope: !84) | |||||
!88 = !DILocation(line: 27, column: 5, scope: !84) | |||||
!89 = !DILocation(line: 27, column: 10, scope: !84) | |||||
!93 = distinct !DISubprogram(name: "test_backwardVectorizable", scope: !1, file: !1, line: 36, type: !45, scopeLine: 36, unit: !0, retainedNodes: !94) | |||||
!94 = !{} | |||||
!99 = distinct !DILexicalBlock(scope: !93, file: !1, line: 37, column: 3) | |||||
!103 = distinct !DILexicalBlock(scope: !99, file: !1, line: 37, column: 3) | |||||
!106 = !DILocation(line: 38, column: 15, scope: !107) | |||||
!107 = distinct !DILexicalBlock(scope: !103, file: !1, line: 37, column: 28) | |||||
!108 = !DILocation(line: 38, column: 12, scope: !107) | |||||
!110 = !DILocation(line: 38, column: 5, scope: !107) | |||||
!111 = !DILocation(line: 38, column: 10, scope: !107) | |||||
!136 = distinct !DISubprogram(name: "test_backward_dep", scope: !1, file: !1, line: 45, type: !137, scopeLine: 45, unit: !0, retainedNodes: !141) | |||||
!137 = !DISubroutineType(types: !138) | |||||
!138 = !{null, !17, !139} | |||||
!139 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !140, size: 64) | |||||
!140 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float) | |||||
!141 = !{} | |||||
!145 = distinct !DILexicalBlock(scope: !136, file: !1, line: 46, column: 3) | |||||
!149 = distinct !DILexicalBlock(scope: !145, file: !1, line: 46, column: 3) | |||||
!153 = distinct !DILexicalBlock(scope: !149, file: !1, line: 46, column: 39) | |||||
!157 = !DILocation(line: 47, column: 5, scope: !153) | |||||
!160 = !DILocation(line: 48, column: 14, scope: !153) | |||||
!166 = distinct !DISubprogram(name: "test_forwardPreventsForwarding_dep", scope: !1, file: !1, line: 58, type: !14, scopeLine: 58, unit: !0, retainedNodes: !167) | |||||
!167 = !{} | |||||
!172 = distinct !DILexicalBlock(scope: !166, file: !1, line: 59, column: 3) | |||||
!175 = !DILocation(line: 59, column: 18, scope: !176) | |||||
!176 = distinct !DILexicalBlock(scope: !172, file: !1, line: 59, column: 3) | |||||
!177 = !DILocation(line: 59, column: 3, scope: !172) | |||||
!178 = !DILocation(line: 63, column: 1, scope: !166) | |||||
!179 = !DILocation(line: 60, column: 5, scope: !180) | |||||
!180 = distinct !DILexicalBlock(scope: !176, file: !1, line: 59, column: 28) | |||||
!181 = !DILocation(line: 60, column: 10, scope: !180) | |||||
!182 = !DILocation(line: 61, column: 15, scope: !180) | |||||
!183 = !DILocation(line: 61, column: 12, scope: !180) | |||||
!184 = !DILocation(line: 61, column: 5, scope: !180) | |||||
!185 = !DILocation(line: 61, column: 10, scope: !180) | |||||
!186 = !DILocation(line: 59, column: 23, scope: !176) | |||||
!189 = distinct !DISubprogram(name: "test_backwardVectorizableButPreventsForwarding", scope: !1, file: !1, line: 72, type: !190, scopeLine: 72, unit: !0, retainedNodes: !192) | |||||
!190 = !DISubroutineType(types: !191) | |||||
!191 = !{null, !16, !17} | |||||
!192 = !{} | |||||
!196 = distinct !DILexicalBlock(scope: !189, file: !1, line: 73, column: 3) | |||||
!200 = distinct !DILexicalBlock(scope: !196, file: !1, line: 73, column: 3) | |||||
!204 = distinct !DILexicalBlock(scope: !200, file: !1, line: 73, column: 29) | |||||
!207 = !DILocation(line: 74, column: 21, scope: !204) | |||||
!209 = !DILocation(line: 74, column: 5, scope: !204) | |||||
!214 = distinct !DISubprogram(name: "test_unknown_dep", scope: !1, file: !1, line: 80, type: !190, scopeLine: 80, unit: !0, retainedNodes: !215) | |||||
!215 = !{} | |||||
!219 = distinct !DILexicalBlock(scope: !214, file: !1, line: 81, column: 3) | |||||
!223 = distinct !DILexicalBlock(scope: !219, file: !1, line: 81, column: 3) | |||||
!227 = distinct !DILexicalBlock(scope: !223, file: !1, line: 81, column: 28) | |||||
!229 = !DILocation(line: 82, column: 7, scope: !227) | |||||
!231 = !DILocation(line: 83, column: 7, scope: !227) |
For remarks, it might be good to check the full yaml generated, as in llvm/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll