diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1523,11 +1523,11 @@ Type *BTy = BPtr->getType()->getPointerElementType(); auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); + bool SizesAreSame = TypeByteSize == DL.getTypeAllocSize(BTy); uint64_t Stride = std::abs(StrideAPtr); const SCEVConstant *C = dyn_cast(Dist); if (!C) { - if (!isa(Dist) && - TypeByteSize == DL.getTypeAllocSize(BTy) && + if (!isa(Dist) && SizesAreSame && isSafeDependenceDistance(DL, *(PSE.getSE()), *(PSE.getBackedgeTakenCount()), *Dist, Stride, TypeByteSize)) @@ -1542,7 +1542,7 @@ int64_t Distance = Val.getSExtValue(); // Attempt to prove strided accesses independent. - if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy && + if (std::abs(Distance) > 0 && Stride > 1 && SizesAreSame && areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) { LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); return Dependence::NoDep; @@ -1563,9 +1563,8 @@ } // Write to the same location with the same size. - // Could be improved to assert type sizes are the same (i32 == float, etc). if (Val == 0) { - if (ATy == BTy) + if (SizesAreSame) return Dependence::Forward; LLVM_DEBUG( dbgs() << "LAA: Zero dependence difference but different types\n"); @@ -1574,7 +1573,7 @@ assert(Val.isStrictlyPositive() && "Expect a positive value"); - if (ATy != BTy) { + if (!SizesAreSame) { LLVM_DEBUG( dbgs() << "LAA: ReadWrite-Write positive dependency with different types\n"); diff --git a/llvm/test/Transforms/LoopVectorize/depend_diff_types.ll b/llvm/test/Transforms/LoopVectorize/depend_diff_types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/depend_diff_types.ll @@ -0,0 +1,81 @@ +; RUN: opt -S -tbaa -loop-accesses -analyze -enable-new-pm=0 < %s | FileCheck %s +; RUN: opt -S -disable-output -passes='require,require,require,loop(print-access-info)' < %s 2>&1 | FileCheck %s + + +; In the function below some of the accesses are done as +; double types and some are done as i64 types. When doing +; dependence analysis the type should not matter if it can +; be determined that they are the same size. This test was +; was taken from a simple Fortran loop: +; +; subroutine foo(V1,V3,M,N) +; implicit none +; integer :: i +; integer, intent(in) :: M, N +; real(kind=8), intent(in) :: V1(16,N) +; real(kind=8), intent(inout) :: V3(16,N) +; do i = 1, M +; V3(i,3) = V1(i,120) * V3(i,65) +; V3(i,64) = V1(i,4) +; V3(i,1021) = V1(i,6) +; end do +; end subroutine + + +; CHECK: Memory dependences are safe with a maximum dependence distance +define void @foo_(i64* nocapture readonly %v1, i64* nocapture %v3, i64* nocapture readonly %m, i64* nocapture readnone %n) #0 { +L.entry: + %0 = bitcast i64* %m to i32* + %1 = load i32, i32* %0, align 4, !tbaa !0 + %2 = icmp slt i32 %1, 1 + br i1 %2, label %L.LB1_320, label %L.LB1_319.preheader + +L.LB1_319.preheader: ; preds = %L.entry + %3 = getelementptr i64, i64* %v3, i64 1023 + %4 = getelementptr i64, i64* %v1, i64 1903 + br label %L.LB1_319 + +L.LB1_319: ; preds = %L.LB1_319.preheader, %L.LB1_319 + %indvars.iv = phi i64 [ 1, %L.LB1_319.preheader ], [ %indvars.iv.next, %L.LB1_319 ] + %.dY0001_321.0 = phi i32 [ %1, %L.LB1_319.preheader ], [ %25, %L.LB1_319 ] + %5 = getelementptr i64, i64* %3, i64 %indvars.iv + %6 = bitcast i64* %5 to double* + %7 = load double, double* %6, align 8, !tbaa !4 + %8 = getelementptr i64, i64* %4, i64 %indvars.iv + %9 = bitcast i64* %8 to double* + %10 = load double, double* %9, align 8, !tbaa !6 + %11 = fmul fast double %10, %7 + %12 = add nsw i64 %indvars.iv, -992 + %13 = getelementptr i64, i64* %3, i64 %12 + %14 = bitcast i64* %13 to double* + store double %11, double* %14, align 8, !tbaa !4 + %15 = add nsw i64 %indvars.iv, -1856 + %16 = getelementptr i64, i64* %4, i64 %15 + %17 = load i64, i64* %16, align 8, !tbaa !6 + %18 = add nsw i64 %indvars.iv, -16 + %19 = getelementptr i64, i64* %3, i64 %18 + store i64 %17, i64* %19, align 8, !tbaa !4 + %20 = add nsw i64 %indvars.iv, -1824 + %21 = getelementptr i64, i64* %4, i64 %20 + %22 = load i64, i64* %21, align 8, !tbaa !6 + %23 = add nuw i64 %indvars.iv, 15296 + %24 = getelementptr i64, i64* %3, i64 %23 + store i64 %22, i64* %24, align 8, !tbaa !4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %25 = add nsw i32 %.dY0001_321.0, -1 + %26 = icmp sgt i32 %.dY0001_321.0, 1 + br i1 %26, label %L.LB1_319, label %L.LB1_320 + +L.LB1_320: ; preds = %L.LB1_319, %L.entry + ret void + +} + +!0 = !{!1, !1, i64 0} +!1 = !{!"t1.6", !2, i64 0} +!2 = !{!"unlimited ptr", !3, i64 0} +!3 = !{!"Flang FAA 1"} +!4 = !{!5, !5, i64 0} +!5 = !{!"t1.a", !2, i64 0} +!6 = !{!7, !7, i64 0} +!7 = !{!"t1.e", !2, i64 0}