diff --git a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h --- a/llvm/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/llvm/include/llvm/Analysis/LoopAccessAnalysis.h @@ -605,6 +605,11 @@ /// invariant. void collectStridedAccess(Value *LoadOrStoreInst); + // Emits the first unsafe memory dependence in a loop. + // Emits nothing if there are no unsafe dependences + // or if the dependences were not recorded. + void emitUnsafeDependenceRemark(); + std::unique_ptr PSE; /// We need to check that all of the pointers in this list are disjoint diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -45,6 +45,7 @@ #include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" #include "llvm/IR/Type.h" @@ -2129,14 +2130,64 @@ dbgs() << "LAA: No unsafe dependent memory operations in loop. We" << (PtrRtChecking->Need ? "" : " don't") << " need runtime memory checks.\n"); - else { - recordAnalysis("UnsafeMemDep") - << "unsafe dependent memory operations in loop. Use " - "#pragma loop distribute(enable) to allow loop distribution " - "to attempt to isolate the offending operations into a separate " - "loop"; - LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); + else + emitUnsafeDependenceRemark(); +} + +void LoopAccessInfo::emitUnsafeDependenceRemark() { + auto Deps = getDepChecker().getDependences(); + DebugLoc SourceLoc; + + if (!Deps) + return; + auto Found = std::find_if( + Deps->begin(), Deps->end(), [](const MemoryDepChecker::Dependence &D) { + return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) != + MemoryDepChecker::VectorizationSafetyStatus::Safe; + }); + if (Found == Deps->end()) + return; + MemoryDepChecker::Dependence Dep = *Found; + + LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); + + // Emit remark for first unsafe dependence + OptimizationRemarkAnalysis &R = + recordAnalysis("UnsafeDep", Dep.getDestination(*this)) + << "unsafe dependent memory operations in loop. Use " + "#pragma loop distribute(enable) to allow loop distribution " + "to attempt to isolate the offending operations into a separate " + "loop"; + + switch (Dep.Type) { + case MemoryDepChecker::Dependence::NoDep: + case MemoryDepChecker::Dependence::Forward: + case MemoryDepChecker::Dependence::BackwardVectorizable: + llvm_unreachable("Unexpected dependence"); + case MemoryDepChecker::Dependence::Backward: + R << "\nBackward loop carried data dependence."; + break; + case MemoryDepChecker::Dependence::ForwardButPreventsForwarding: + R << "\nForward loop carried data dependence that prevents " + "store-to-load forwarding."; + break; + case MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding: + R << "\nBackward loop carried data dependence that prevents " + "store-to-load forwarding."; + break; + case MemoryDepChecker::Dependence::Unknown: + R << "\nUnknown data dependence."; + break; + } + + if (Instruction *I = Dep.getSource(*this)) { + SourceLoc = I->getDebugLoc(); + if (auto *DD = dyn_cast_or_null(getPointerOperand(I))) + SourceLoc = DD->getDebugLoc(); } + if (SourceLoc) + R << " Memory location is the same as accessed at " + << ore::NV("Location", SourceLoc); } bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll @@ -78,6 +78,7 @@ ; CHECK-LABEL: function 'backdep_type_store_size_equivalence': ; CHECK-NEXT: loop: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Unknown: ; CHECK-NEXT: %ld.f32 = load float, float* %gep.iv.f32, align 8 -> @@ -120,6 +121,7 @@ ; CHECK-LABEL: function 'neg_dist_dep_type_size_equivalence': ; CHECK-NEXT: loop: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Unknown: ; CHECK-NEXT: %ld.i64 = load i64, i64* %gep.iv, align 8 -> diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll b/llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/pointer-phis.ll @@ -127,6 +127,7 @@ ; CHECK-LABEL: 'load_with_pointer_phi_outside_loop' ; CHECK-NEXT: loop.header: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Unknown: ; CHECK-NEXT: %v8 = load double, double* %ptr, align 8 -> @@ -164,6 +165,7 @@ ; CHECK-LABEL: 'store_with_pointer_phi_outside_loop' ; CHECK-NEXT: loop.header: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Unknown: ; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 -> @@ -201,6 +203,7 @@ ; CHECK-LABEL: 'store_with_pointer_phi_incoming_phi' ; CHECK-NEXT: loop.header: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Unknown: ; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 -> @@ -277,6 +280,7 @@ ; CHECK-LABEL: 'store_with_pointer_phi_incoming_phi_irreducible_cycle' ; CHECK-NEXT: loop.header: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Unknown: ; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 -> @@ -348,6 +352,7 @@ ; CHECK-LABEL: 'store_with_pointer_phi_outside_loop_select' ; CHECK-NEXT: loop.header: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Unknown: ; CHECK-NEXT: %v8 = load double, double* %arrayidx, align 8 -> @@ -413,6 +418,7 @@ ; CHECK-LABEL: Loop access info in function 'phi_load_store_memdep_check': ; CHECK-NEXT: for.body: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Unknown: ; CHECK-NEXT: %lv3 = load i16, i16* %c.sink, align 2 -> diff --git a/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll b/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/pointer-with-unknown-bounds.ll @@ -13,6 +13,7 @@ ; CHECK-NEXT: for.body: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop ; CHECK-NOT: Report: cannot identify array bounds +; CHECK-NEXT: Unknown data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Unknown: ; CHECK-NEXT: %loadA = load i16, i16* %arrayidxA, align 2 -> diff --git a/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll b/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/stride-access-dependence.ll @@ -118,6 +118,7 @@ ; CHECK: function 'unsafe_Read_Write': ; CHECK-NEXT: for.body: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop +; CHECK-NEXT: Backward loop carried data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Backward: ; CHECK-NEXT: %0 = load i32, i32* %arrayidx, align 4 -> @@ -157,6 +158,7 @@ ; CHECK: function 'unsafe_Write_Read': ; CHECK-NEXT: for.body: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop +; CHECK-NEXT: Backward loop carried data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Backward: ; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 -> @@ -193,6 +195,7 @@ ; CHECK: function 'unsafe_Write_Write': ; CHECK-NEXT: for.body: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop +; CHECK-NEXT: Backward loop carried data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Backward: ; CHECK-NEXT: store i32 %0, i32* %arrayidx, align 4 -> @@ -346,6 +349,7 @@ ; CHECK: function 'vectorizable_unscaled_Read_Write': ; CHECK-NEXT: for.body: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop +; CHECK-NEXT: Backward loop carried data dependence that prevents store-to-load forwarding. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: BackwardVectorizableButPreventsForwarding: ; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 -> @@ -425,6 +429,7 @@ ; CHECK: function 'unsafe_unscaled_Read_Write': ; CHECK-NEXT: for.body: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop +; CHECK-NEXT: Backward loop carried data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Backward: ; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 -> @@ -455,6 +460,7 @@ ; CHECK: function 'unsafe_unscaled_Read_Write2': ; CHECK-NEXT: for.body: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop +; CHECK-NEXT: Backward loop carried data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Backward: ; CHECK-NEXT: %2 = load i32, i32* %arrayidx, align 4 -> @@ -505,6 +511,7 @@ ; CHECK: function 'interleaved_stores': ; CHECK-NEXT: for.body: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop +; CHECK-NEXT: Backward loop carried data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Backward: ; CHECK-NEXT: store i32 %4, i32* %arrayidx5, align 4 -> diff --git a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/symbolic-stride.ll @@ -8,6 +8,7 @@ ; CHECK-LABEL: Loop access info in function 'single_stride': ; CHECK-NEXT: loop: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Backward loop carried data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Backward: ; CHECK-NEXT: %load = load i32, i32* %gep.A, align 4 -> @@ -51,6 +52,7 @@ ; CHECK-LABEL: Loop access info in function 'single_stride_struct': ; CHECK-NEXT: loop: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Backward loop carried data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Backward: ; CHECK-NEXT: %load = load { i32, i8 }, { i32, i8 }* %gep.A, align 4 -> @@ -97,6 +99,7 @@ ; CHECK-LABEL: Loop access info in function 'two_strides': ; CHECK-NEXT: loop: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Backward loop carried data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Backward: ; CHECK-NEXT: %load = load i32, i32* %gep.A, align 4 -> diff --git a/llvm/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll b/llvm/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/underlying-objects-2.ll @@ -39,6 +39,7 @@ ; CHECK-LABEL: function 'f' ; CHECK: for_j.body: ; CHECK-NEXT: Report: unsafe dependent memory operations in loop +; CHECK-NEXT: Backward loop carried data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Backward: ; CHECK-NEXT: %loadB = load i8, i8* %gepB, align 1 -> diff --git a/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll b/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll --- a/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll +++ b/llvm/test/Analysis/LoopAccessAnalysis/unsafe-and-rt-checks.ll @@ -8,6 +8,7 @@ target triple = "x86_64-apple-macosx10.10.0" ; CHECK: Report: unsafe dependent memory operations in loop +; CHECK-NEXT: Backward loop carried data dependence. ; CHECK-NEXT: Dependences: ; CHECK-NEXT: Backward: ; CHECK-NEXT: %loadA = load i16, i16* %arrayidxA, align 2 -> diff --git a/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll b/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll --- a/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll +++ b/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll @@ -22,9 +22,12 @@ ; 19 } ; 20 } -; CHECK: remark: /tmp/s.c:2:3: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop (hotness: 300) -; CHECK: remark: /tmp/s.c:9:3: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop (hotness: 5000) -; CHECK: remark: /tmp/s.c:16:3: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop{{$}} +; CHECK: remark: /tmp/s.c:3:14: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Backward loop carried data dependence. Memory location is the same as accessed at /tmp/s.c:3:16 (hotness: 300) +; CHECK: remark: /tmp/s.c:10:14: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Backward loop carried data dependence. Memory location is the same as accessed at /tmp/s.c:10:16 (hotness: 5000) +; CHECK: remark: /tmp/s.c:17:14: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Backward loop carried data dependence. Memory location is the same as accessed at /tmp/s.c:17:16{{$}} ; ModuleID = '/tmp/s.c' source_filename = "/tmp/s.c" diff --git a/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll b/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/memory-dep-remarks.ll @@ -0,0 +1,403 @@ +; RUN: opt -passes='loop(require),function(loop-vectorize)' -disable-output -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s +; RUN: opt < %s -passes='loop(require),function(loop-vectorize)' -o /dev/null -pass-remarks-output=%t.yaml +; RUN: cat %t.yaml | FileCheck -check-prefix=YAML %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + +; // a) Dependence::NoDep +; // Loop containing only reads (here of the array A) does not hinder vectorization +; void test_nodep(int n, int* A, int* B) { +; for(int i = 1; i < n ; ++i) { +; B[i] = A[i-1] + A[i+2]; +; } +; } + +; CHECK-NOT: remark: source.c:{{0-9]+}}:{{[0-9]+}}: + +define void @test_nodep(i64 %n, i32* nocapture readonly %A, i32* nocapture %B) !dbg !44 { +entry: + %cmp12 = icmp sgt i64 %n, 1 + br i1 %cmp12, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ] + %0 = add nsw i64 %indvars.iv, -1 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !61 + %1 = load i32, i32* %arrayidx, align 4, !dbg !61 + %2 = add nuw nsw i64 %indvars.iv, 2 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %2, !dbg !63 + %3 = load i32, i32* %arrayidx2, align 4, !dbg !63 + %add3 = add nsw i32 %3, %1 + %arrayidx5 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + store i32 %add3, i32* %arrayidx5, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: ; preds = %for.body, %entry + ret void +} + + +; // b) Dependence::Forward +; // Loop gets vectorized since it contains only a forward +; // dependency between A[i-2] and A[i] +; void test_forward(int n, int* A, int* B) { +; for(int i=1; i < n; ++i) { +; A[i] = 10; +; B[i] = A[i-2]; +; } +; } + +; CHECK-NOT: remark: source.c:{{0-9]+}}:{{[0-9]+}}: +define dso_local void @test_forward(i64 %n, i32* nocapture %A, i32* nocapture %B) !dbg !70 { +entry: + %cmp11 = icmp sgt i64 %n, 1 + br i1 %cmp11, label %for.body, label %for.cond.cleanup, !dbg !81 + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !83 + store i32 10, i32* %arrayidx, align 4 + %0 = add nsw i64 %indvars.iv, -2 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !87 + %1 = load i32, i32* %arrayidx2, align 4, !dbg !87 + %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv, !dbg !88 + store i32 %1, i32* %arrayidx4, align 4, !dbg !89 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !dbg !81 + + for.cond.cleanup: ; preds = %for.body, %entry + ret void +} + + +; // c) Dependence::BackwardVectorizable +; // Loop gets vectorized since it contains a backward dependency +; // between A[i] and A[i-4], but the dependency distance (4) is +; // greater than the minimum possible VF (2 in this case) +; void test_backwardVectorizable(int n, int* A) { +; for(int i=4; i < n; ++i) { +; A[i] = A[i-4] + 1; +; } +; } + +; CHECK-NOT: remark: source.c:{{0-9]+}}:{{[0-9]+}}: + +define dso_local void @test_backwardVectorizable(i64 %n, i32* nocapture %A) !dbg !93 { +entry: + %cmp8 = icmp sgt i64 %n, 4 + br i1 %cmp8, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 4, %entry ], [ %indvars.iv.next, %for.body ] + %0 = add nsw i64 %indvars.iv, -4, !dbg !106 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !108 + %1 = load i32, i32* %arrayidx, align 4, !dbg !108 + %add = add nsw i32 %1, 1 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !110 + store i32 %add, i32* %arrayidx2, align 4, !dbg !111 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + ret void +} + +; // d) Dependence::Backward +; // Loop does not get vectorized since it contains a backward +; // dependency between A[i] and A[i+3]. +; void test_backward_dep(int n, int *A) { +; for (int i = 1; i <= n - 3; i += 3) { +; A[i] = A[i-1]; +; A[i+1] = A[i+3]; +; } +; } + +; CHECK: remark: source.c:48:14: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Backward loop carried data dependence. Memory location is the same as accessed at source.c:47:5 + +define void @test_backward_dep(i64 %n, i32* nocapture %A) { +entry: + %cmp.not19 = icmp slt i64 %n, 4 + br i1 %cmp.not19, label %for.cond.cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %sub = add nsw i64 %n, -3 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 1, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %0 = add nsw i64 %indvars.iv, -1 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0 + %1 = load i32, i32* %arrayidx, align 8 + %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !157 + store i32 %1, i32* %arrayidx3, align 8 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 3 + %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next, !dbg !160 + %2 = load i32, i32* %arrayidx5, align 8, !dbg !160 + %3 = add nuw nsw i64 %indvars.iv, 1 + %arrayidx8 = getelementptr inbounds i32, i32* %A, i64 %3 + store i32 %2, i32* %arrayidx8, align 8 + %cmp.not = icmp ugt i64 %indvars.iv.next, %n + br i1 %cmp.not, label %for.cond.cleanup, label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + ret void +} + +; // e) Dependence::ForwardButPreventsForwarding +; // Loop does not get vectorized despite only having a forward +; // dependency between A[i] and A[i-3]. +; // This is because the store-to-load forwarding distance (here 3) +; // needs to be a multiple of vector factor otherwise the +; // store (A[5:6] in i=5) and load (A[4:5],A[6:7] in i=7,9) are unaligned. +; void test_forwardButPreventsForwarding_dep(int n, int* A, int* B) { +; for(int i=3; i < n; ++i) { +; A[i] = 10; +; B[i] = A[i-3]; +; } +; } + +; CHECK: remark: source.c:61:12: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK-NEXT: Forward loop carried data dependence that prevents store-to-load forwarding. Memory location is the same as accessed at source.c:60:5 + +define void @test_forwardButPreventsForwarding_dep(i64 %n, i32* nocapture %A, i32* nocapture %B) !dbg !166 { +entry: + %cmp11 = icmp sgt i64 %n, 3 + br i1 %cmp11, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 3, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !179 + store i32 10, i32* %arrayidx, align 4 + %0 = add nsw i64 %indvars.iv, -3 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !183 + %1 = load i32, i32* %arrayidx2, align 4, !dbg !183 + %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv + store i32 %1, i32* %arrayidx4, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + ret void +} + +; // f) Dependence::BackwardVectorizableButPreventsForwarding +; // Loop does not get vectorized despite having a backward +; // but vectorizable dependency between A[i] and A[i-15]. +; // +; // This is because the store-to-load forwarding distance (here 15) +; // needs to be a multiple of vector factor otherwise +; // store (A[16:17] in i=16) and load (A[15:16], A[17:18] in i=30,32) are unaligned. +; void test_backwardVectorizableButPreventsForwarding(int n, int* A) { +; for(int i=15; i < n; ++i) { +; A[i] = A[i-2] + A[i-15]; +; } +; } + +; CHECK: remark: source.c:74:5: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK: Backward loop carried data dependence that prevents store-to-load forwarding. Memory location is the same as accessed at source.c:74:21 + +define void @test_backwardVectorizableButPreventsForwarding(i64 %n, i32* nocapture %A) !dbg !189 { +entry: + %cmp13 = icmp sgt i64 %n, 15 + br i1 %cmp13, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 15, %entry ], [ %indvars.iv.next, %for.body ] + %0 = add nsw i64 %indvars.iv, -2 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0 + %1 = load i32, i32* %arrayidx, align 4 + %2 = add nsw i64 %indvars.iv, -15 + %arrayidx3 = getelementptr inbounds i32, i32* %A, i64 %2, !dbg !207 + %3 = load i32, i32* %arrayidx3, align 4 + %add = add nsw i32 %3, %1 + %arrayidx5 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !209 + store i32 %add, i32* %arrayidx5, align 4, !dbg !209 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + ret void +} + +; // g) Dependence::Unknown +; // Different stride lengths +; void test_unknown_dep(int n, int* A) { +; for(int i=0; i < n; ++i) { +; A[(i+1)*4] = 10; +; A[i] = 100; +; } +; } + +; CHECK: remark: source.c:83:7: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK: Unknown data dependence. Memory location is the same as accessed at source.c:82:7 + +define void @test_unknown_dep(i64 %n, i32* nocapture %A) !dbg !214 { +entry: + %cmp8 = icmp sgt i64 %n, 0 + br i1 %cmp8, label %for.body, label %for.cond.cleanup + +for.body: ; preds = %entry, %for.body + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %0 = shl nsw i64 %indvars.iv.next, 2 + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %0, !dbg !229 + store i32 10, i32* %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !231 + store i32 100, i32* %arrayidx2, align 4, !dbg !231 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + ret void +} + +; YAML: --- !Analysis +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: UnsafeDep +; YAML-NEXT: DebugLoc: { File: source.c, Line: 48, Column: 14 } +; YAML-NEXT: Function: test_backward_dep +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: 'unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop' +; YAML-NEXT: - String: "\nBackward loop carried data dependence." +; YAML-NEXT: - String: ' Memory location is the same as accessed at ' +; YAML-NEXT: - Location: 'source.c:47:5' +; YAML-NEXT: DebugLoc: { File: source.c, Line: 47, Column: 5 } +; YAML-NEXT: ... +; YAML-NEXT: --- !Missed +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: MissedDetails +; YAML-NEXT: Function: test_backward_dep +; YAML-NEXT: Args: +; YAML-NEXT: - String: loop not vectorized +; YAML-NEXT: ... +; YAML-NEXT: --- !Analysis +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: UnsafeDep +; YAML-NEXT: DebugLoc: { File: source.c, Line: 61, Column: 12 } +; YAML-NEXT: Function: test_forwardButPreventsForwarding_dep +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: 'unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop' +; YAML-NEXT: - String: "\nForward loop carried data dependence that prevents store-to-load forwarding." +; YAML-NEXT: - String: ' Memory location is the same as accessed at ' +; YAML-NEXT: - Location: 'source.c:60:5' +; YAML-NEXT: DebugLoc: { File: source.c, Line: 60, Column: 5 } +; YAML-NEXT: ... +; YAML-NEXT: --- !Missed +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: MissedDetails +; YAML-NEXT: Function: test_forwardButPreventsForwarding_dep +; YAML-NEXT: Args: +; YAML-NEXT: - String: loop not vectorized +; YAML-NEXT: ... +; YAML-NEXT: --- !Analysis +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: UnsafeDep +; YAML-NEXT: DebugLoc: { File: source.c, Line: 74, Column: 5 } +; YAML-NEXT: Function: test_backwardVectorizableButPreventsForwarding +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: 'unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop' +; YAML-NEXT: - String: "\nBackward loop carried data dependence that prevents store-to-load forwarding." +; YAML-NEXT: - String: ' Memory location is the same as accessed at ' +; YAML-NEXT: - Location: 'source.c:74:21' +; YAML-NEXT: DebugLoc: { File: source.c, Line: 74, Column: 21 } +; YAML-NEXT: ... +; YAML-NEXT: --- !Missed +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: MissedDetails +; YAML-NEXT: Function: test_backwardVectorizableButPreventsForwarding +; YAML-NEXT: Args: +; YAML-NEXT: - String: loop not vectorized +; YAML-NEXT: ... +; YAML-NEXT: --- !Analysis +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: UnsafeDep +; YAML-NEXT: DebugLoc: { File: source.c, Line: 83, Column: 7 } +; YAML-NEXT: Function: test_unknown_dep +; YAML-NEXT: Args: +; YAML-NEXT: - String: 'loop not vectorized: ' +; YAML-NEXT: - String: 'unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop' +; YAML-NEXT: - String: "\nUnknown data dependence." +; YAML-NEXT: - String: ' Memory location is the same as accessed at ' +; YAML-NEXT: - Location: 'source.c:82:7' +; YAML-NEXT: DebugLoc: { File: source.c, Line: 82, Column: 7 } +; YAML-NEXT: ... +; YAML-NEXT: --- !Missed +; YAML-NEXT: Pass: loop-vectorize +; YAML-NEXT: Name: MissedDetails +; YAML-NEXT: Function: test_unknown_dep +; YAML-NEXT: Args: +; YAML-NEXT: - String: loop not vectorized +; YAML-NEXT: ... + + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!4} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 14.0.0 (https://github.com/llvm/llvm-project.git 54f0f826c5c7d0ff16c230b259cb6aad33e18d97)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) +!1 = !DIFile(filename: "source.c", directory: "") +!2 = !{} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!44 = distinct !DISubprogram(name: "test_nodep", scope: !1, file: !1, line: 14, type: !45, scopeLine: 14, unit: !0, retainedNodes: !2) +!45 = !DISubroutineType(types: !46) +!46 = !{null, !18, !16, !16} +!16 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !17, size: 64) +!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!18 = !DIBasicType(name: "int", size: 64, encoding: DW_ATE_signed) +!52 = distinct !DILexicalBlock(scope: !44, file: !1, line: 15, column: 3) +!56 = distinct !DILexicalBlock(scope: !52, file: !1, line: 15, column: 3) +!60 = distinct !DILexicalBlock(scope: !56, file: !1, line: 15, column: 31) +!61 = !DILocation(line: 16, column: 12, scope: !60) +!63 = !DILocation(line: 16, column: 21, scope: !60) +!70 = distinct !DISubprogram(name: "test_forward", scope: !1, file: !1, line: 24, type: !45, scopeLine: 24, unit: !0, retainedNodes: !2) +!77 = distinct !DILexicalBlock(scope: !70, file: !1, line: 25, column: 3) +!80 = distinct !DILexicalBlock(scope: !77, file: !1, line: 25, column: 3) +!81 = !DILocation(line: 25, column: 3, scope: !77) +!83 = !DILocation(line: 26, column: 5, scope: !84) +!84 = distinct !DILexicalBlock(scope: !80, file: !1, line: 25, column: 28) +!87 = !DILocation(line: 27, column: 12, scope: !84) +!88 = !DILocation(line: 27, column: 5, scope: !84) +!89 = !DILocation(line: 27, column: 10, scope: !84) +!93 = distinct !DISubprogram(name: "test_backwardVectorizable", scope: !1, file: !1, line: 36, type: !95, scopeLine: 36, unit: !0, retainedNodes: !2) +!95 = !DISubroutineType(types: !96) +!96 = !{null, !18, !16} +!99 = distinct !DILexicalBlock(scope: !93, file: !1, line: 37, column: 3) +!103 = distinct !DILexicalBlock(scope: !99, file: !1, line: 37, column: 3) +!106 = !DILocation(line: 38, column: 15, scope: !107) +!107 = distinct !DILexicalBlock(scope: !103, file: !1, line: 37, column: 28) +!108 = !DILocation(line: 38, column: 12, scope: !107) +!110 = !DILocation(line: 38, column: 5, scope: !107) +!111 = !DILocation(line: 38, column: 10, scope: !107) +!136 = distinct !DISubprogram(name: "test_backward_dep", scope: !1, file: !1, line: 45, type: !95, scopeLine: 45, unit: !0, retainedNodes: !2) +!145 = distinct !DILexicalBlock(scope: !136, file: !1, line: 46, column: 3) +!149 = distinct !DILexicalBlock(scope: !145, file: !1, line: 46, column: 3) +!153 = distinct !DILexicalBlock(scope: !149, file: !1, line: 46, column: 39) +!157 = !DILocation(line: 47, column: 5, scope: !153) +!160 = !DILocation(line: 48, column: 14, scope: !153) +!166 = distinct !DISubprogram(name: "test_forwardButPreventsForwarding_dep", scope: !1, file: !1, line: 58, type: !45, scopeLine: 58, unit: !0, retainedNodes: !2) +!172 = distinct !DILexicalBlock(scope: !166, file: !1, line: 59, column: 3) +!176 = distinct !DILexicalBlock(scope: !172, file: !1, line: 59, column: 3) +!179 = !DILocation(line: 60, column: 5, scope: !180) +!180 = distinct !DILexicalBlock(scope: !176, file: !1, line: 59, column: 28) +!183 = !DILocation(line: 61, column: 12, scope: !180) +!189 = distinct !DISubprogram(name: "test_backwardVectorizableButPreventsForwarding", scope: !1, file: !1, line: 72, type: !95, scopeLine: 72, unit: !0, retainedNodes: !2) +!196 = distinct !DILexicalBlock(scope: !189, file: !1, line: 73, column: 3) +!200 = distinct !DILexicalBlock(scope: !196, file: !1, line: 73, column: 3) +!204 = distinct !DILexicalBlock(scope: !200, file: !1, line: 73, column: 29) +!207 = !DILocation(line: 74, column: 21, scope: !204) +!209 = !DILocation(line: 74, column: 5, scope: !204) +!214 = distinct !DISubprogram(name: "test_unknown_dep", scope: !1, file: !1, line: 80, type: !95, scopeLine: 80, unit: !0, retainedNodes: !2) +!219 = distinct !DILexicalBlock(scope: !214, file: !1, line: 81, column: 3) +!223 = distinct !DILexicalBlock(scope: !219, file: !1, line: 81, column: 3) +!227 = distinct !DILexicalBlock(scope: !223, file: !1, line: 81, column: 28) +!229 = !DILocation(line: 82, column: 7, scope: !227) +!231 = !DILocation(line: 83, column: 7, scope: !227) diff --git a/llvm/test/Transforms/LoopVectorize/unsafe-dep-remark.ll b/llvm/test/Transforms/LoopVectorize/unsafe-dep-remark.ll --- a/llvm/test/Transforms/LoopVectorize/unsafe-dep-remark.ll +++ b/llvm/test/Transforms/LoopVectorize/unsafe-dep-remark.ll @@ -11,7 +11,7 @@ ; 5 } ; 6 } -; CHECK: remark: /tmp/kk.c:2:3: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop +; CHECK: remark: /tmp/kk.c:3:14: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop define void @success(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) !dbg !6 { entry: