diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp --- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -1568,11 +1568,12 @@ auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout(); uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); + bool HasSameSize = + DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy); uint64_t Stride = std::abs(StrideAPtr); const SCEVConstant *C = dyn_cast(Dist); if (!C) { - if (!isa(Dist) && - TypeByteSize == DL.getTypeAllocSize(BTy) && + if (!isa(Dist) && HasSameSize && isSafeDependenceDistance(DL, *(PSE.getSE()), *(PSE.getBackedgeTakenCount()), *Dist, Stride, TypeByteSize)) @@ -1587,7 +1588,7 @@ int64_t Distance = Val.getSExtValue(); // Attempt to prove strided accesses independent. - if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy && + if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize && areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) { LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); return Dependence::NoDep; @@ -1598,7 +1599,7 @@ bool IsTrueDataDependence = (AIsWrite && !BIsWrite); if (IsTrueDataDependence && EnableForwardingConflictDetection && (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) || - ATy != BTy)) { + !HasSameSize)) { LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); return Dependence::ForwardButPreventsForwarding; } @@ -1608,21 +1609,19 @@ } // Write to the same location with the same size. - // Could be improved to assert type sizes are the same (i32 == float, etc). if (Val == 0) { - if (ATy == BTy) + if (HasSameSize) return Dependence::Forward; LLVM_DEBUG( - dbgs() << "LAA: Zero dependence difference but different types\n"); + dbgs() << "LAA: Zero dependence difference but different type sizes\n"); return Dependence::Unknown; } assert(Val.isStrictlyPositive() && "Expect a positive value"); - if (ATy != BTy) { - LLVM_DEBUG( - dbgs() - << "LAA: ReadWrite-Write positive dependency with different types\n"); + if (!HasSameSize) { + LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with " + "different type sizes\n"); return Dependence::Unknown; } diff --git a/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/LoopAccessAnalysis/depend_diff_types.ll @@ -0,0 +1,185 @@ +; RUN: opt -S -disable-output -passes='require,require,loop(print-access-info)' < %s 2>&1 | FileCheck %s + + +; In the function below some of the accesses are done as float types and some +; are done as i32 types. When doing dependence analysis the type should not +; matter if it can be determined that they are the same size. + +%int_pair = type { i32, i32 } + +; CHECK-LABEL: function 'backdep_type_size_equivalence': +; CHECK-NEXT: loop: +; CHECK-NEXT: Memory dependences are safe with a maximum dependence distance of 800 bytes +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.f32 = load float, float* %gep.iv.f32, align 8 -> +; CHECK-NEXT: store i32 %indvars.iv.i32, i32* %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Forward: +; CHECK-NEXT: %ld.f32 = load float, float* %gep.iv.f32, align 8 -> +; CHECK-NEXT: store float %val, float* %gep.iv.min.100.f32, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: BackwardVectorizable: +; CHECK-NEXT: store float %val, float* %gep.iv.min.100.f32, align 8 -> +; CHECK-NEXT: store i32 %indvars.iv.i32, i32* %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: + +define void @backdep_type_size_equivalence(%int_pair* nocapture %vec, i64 %n) { +entry: + br label %loop + +loop: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ] + + ;; Load from vec[indvars.iv].x as float + %gep.iv = getelementptr inbounds %int_pair, %int_pair* %vec, i64 %indvars.iv, i32 0 + %gep.iv.f32 = bitcast i32* %gep.iv to float* + %ld.f32 = load float, float* %gep.iv.f32, align 8 + %val = fmul fast float %ld.f32, 5.0 + + ;; Store to vec[indvars.iv - 100].x as float + %indvars.iv.min.100 = add nsw i64 %indvars.iv, -100 + %gep.iv.min.100 = getelementptr inbounds %int_pair, %int_pair* %vec, i64 %indvars.iv.min.100, i32 0 + %gep.iv.min.100.f32 = bitcast i32* %gep.iv.min.100 to float* + store float %val, float* %gep.iv.min.100.f32, align 8 + + ;; Store to vec[indvars.iv].x as i32, creating a backward dependency between + ;; the two stores with different element types but the same element size. + %indvars.iv.i32 = trunc i64 %indvars.iv to i32 + store i32 %indvars.iv.i32, i32* %gep.iv, align 8 + + ;; Store to vec[indvars.iv].y as i32, strided accesses should be independent + ;; between the two stores with different element types but the same element size. + %gep.iv.1 = getelementptr inbounds %int_pair, %int_pair* %vec, i64 %indvars.iv, i32 1 + store i32 %indvars.iv.i32, i32* %gep.iv.1, align 8 + + ;; Store to vec[indvars.iv + n].y as i32, to verify no dependence in the case + ;; of unknown dependence distance. + %indvars.iv.n = add nuw nsw i64 %indvars.iv, %n + %gep.iv.n = getelementptr inbounds %int_pair, %int_pair* %vec, i64 %indvars.iv.n, i32 1 + store i32 %indvars.iv.i32, i32* %gep.iv.n, align 8 + + ;; Loop condition. + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %cond = icmp eq i64 %indvars.iv.next, %n + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +; In the function below one of the accesses is done as i19 type, which has a +; different store size than the i32 type, even though their alloc sizes are +; equivalent. This is a negative test to ensure that they are not analyzed as +; in the tests above. +; +; CHECK-LABEL: function 'backdep_type_store_size_equivalence': +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.f32 = load float, float* %gep.iv.f32, align 8 -> +; CHECK-NEXT: store i19 %indvars.iv.i19, i19* %gep.iv.i19, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: + +define void @backdep_type_store_size_equivalence(%int_pair* nocapture %vec, i64 %n) { +entry: + br label %loop + +loop: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ] + + ;; Load from vec[indvars.iv].x as float + %gep.iv = getelementptr inbounds %int_pair, %int_pair* %vec, i64 %indvars.iv, i32 0 + %gep.iv.f32 = bitcast i32* %gep.iv to float* + %ld.f32 = load float, float* %gep.iv.f32, align 8 + %val = fmul fast float %ld.f32, 5.0 + + ;; Store to vec[indvars.iv].x as i19. + %indvars.iv.i19 = trunc i64 %indvars.iv to i19 + %gep.iv.i19 = bitcast i32* %gep.iv to i19* + store i19 %indvars.iv.i19, i19* %gep.iv.i19, align 8 + + ;; Loop condition. + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %cond = icmp eq i64 %indvars.iv.next, %n + br i1 %cond, label %exit, label %loop + +exit: + ret void +} + +; In the function below some of the accesses are done as double types and some +; are done as i64 and i32 types. This is a negative test to ensure that they +; are not analyzed as in the tests above. + +; CHECK-LABEL: function 'neg_dist_dep_type_size_equivalence': +; CHECK-NEXT: loop: +; CHECK-NEXT: Report: unsafe dependent memory operations in loop. +; CHECK-NEXT: Dependences: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.i64 = load i64, i64* %gep.iv, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, i32* %gep.iv.n.i32, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: ForwardButPreventsForwarding: +; CHECK-NEXT: store double %val, double* %gep.iv.101.f64, align 8 -> +; CHECK-NEXT: %ld.i64 = load i64, i64* %gep.iv, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: %ld.f64 = load double, double* %gep.iv.f64, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, i32* %gep.iv.n.i32, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: BackwardVectorizableButPreventsForwarding: +; CHECK-NEXT: %ld.f64 = load double, double* %gep.iv.f64, align 8 -> +; CHECK-NEXT: store double %val, double* %gep.iv.101.f64, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Unknown: +; CHECK-NEXT: store double %val, double* %gep.iv.101.f64, align 8 -> +; CHECK-NEXT: store i32 %ld.i64.i32, i32* %gep.iv.n.i32, align 8 +; CHECK-EMPTY: +; CHECK-NEXT: Run-time memory checks: +; CHECK-NEXT: Grouped accesses: + +define void @neg_dist_dep_type_size_equivalence(i64* nocapture %vec, i64 %n) { +entry: + br label %loop + +loop: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %loop ] + + ;; Load from vec[indvars.iv] as double + %gep.iv = getelementptr i64, i64* %vec, i64 %indvars.iv + %gep.iv.f64 = bitcast i64* %gep.iv to double* + %ld.f64 = load double, double* %gep.iv.f64, align 8 + %val = fmul fast double %ld.f64, 5.0 + + ;; Store to vec[indvars.iv + 101] as double + %indvars.iv.101 = add nsw i64 %indvars.iv, 101 + %gep.iv.101.i64 = getelementptr i64, i64* %vec, i64 %indvars.iv.101 + %gep.iv.101.f64 = bitcast i64* %gep.iv.101.i64 to double* + store double %val, double* %gep.iv.101.f64, align 8 + + ;; Read from vec[indvars.iv] as i64 creating + ;; a forward but prevents forwarding dependence + ;; with different types but same sizes. + %ld.i64 = load i64, i64* %gep.iv, align 8 + + ;; Different sizes + %indvars.iv.n = add nuw nsw i64 %indvars.iv, %n + %gep.iv.n.i64 = getelementptr inbounds i64, i64* %vec, i64 %indvars.iv.n + %gep.iv.n.i32 = bitcast i64* %gep.iv.n.i64 to i32* + %ld.i64.i32 = trunc i64 %ld.i64 to i32 + store i32 %ld.i64.i32, i32* %gep.iv.n.i32, align 8 + + ;; Loop condition. + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %cond = icmp eq i64 %indvars.iv.next, %n + br i1 %cond, label %exit, label %loop + +exit: + ret void +}