Index: llvm/include/llvm/Analysis/DependenceAnalysis.h =================================================================== --- llvm/include/llvm/Analysis/DependenceAnalysis.h +++ llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -282,9 +282,8 @@ /// depends - Tests for a dependence between the Src and Dst instructions. /// Returns NULL if no dependence; otherwise, returns a Dependence (or a /// FullDependence) with as much information as can be gleaned. - /// The flag PossiblyLoopIndependent should be set by the caller - /// if it appears that control flow can reach from Src to Dst - /// without traversing a loop back edge. + /// If PossiblyLoopIndependant is true will also test for intra-iteration + /// dependencies. std::unique_ptr depends(Instruction *Src, Instruction *Dst, bool PossiblyLoopIndependent); Index: llvm/lib/Analysis/DependenceAnalysis.cpp =================================================================== --- llvm/lib/Analysis/DependenceAnalysis.cpp +++ llvm/lib/Analysis/DependenceAnalysis.cpp @@ -3491,9 +3491,17 @@ Inv.invalidate(F, PA); } + /// depends - Tests for a dependence between the Src and Dst instructions. + /// Returns NULL if no dependence; otherwise, returns a Dependence (or a + /// FullDependence) with as much information as can be gleaned. + /// If PossiblyLoopIndependant is true will also test for intra-iteration + /// dependencies. + // depends - // Returns NULL if there is no dependence. // Otherwise, return a Dependence with as many details as possible. +// If PossiblyLoopIndependant is true will also test for intra-iteration +// dependencies. // Corresponds to Section 3.1 in the paper // // Practical Dependence Testing @@ -3547,6 +3555,10 @@ FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); ++TotalArrayPairs; + // No common loops AND don't care about loop independent dependencies + if (!PossiblyLoopIndependent && CommonLevels == 0) + return nullptr; + unsigned Pairs = 1; SmallVector Pair(Pairs); const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); @@ -3556,6 +3568,43 @@ Pair[0].Src = SrcSCEV; Pair[0].Dst = DstSCEV; + // DependenceAnalysis isn't able to deal with different + // access widths. If access widths are the same, but alignment is + // smaller than the store size, accesses could overlap. + // Reject these cases. + auto IsUnalignedLoadStore = [](const DataLayout *DL, const Instruction *I) { + const LoadInst *LI = dyn_cast(I); + const StoreInst *SI = dyn_cast(I); + Type *IType = (LI) ? LI->getType() : SI->getValueOperand()->getType(); + uint64_t Width = DL->getTypeStoreSize(IType); + unsigned Alignment = (LI) ? LI->getAlignment() : SI->getAlignment(); + + // Don't know the alignment, be conservative + if (Alignment == 0) + Alignment = DL->getABITypeAlign(IType).value(); + + return ((Alignment % Width) != 0); + }; + + auto GetMemOperandType = [](const Instruction *I) { + return (isa(I) ? dyn_cast(I)->getPointerOperandType() + : dyn_cast(I)->getPointerOperandType()); + }; + + auto &DL = F->getParent()->getDataLayout(); + bool ModifiedGEPOp = + !dyn_cast(SrcPtr) || !dyn_cast(DstPtr); + TypeSize SrcWriteSize = DL.getTypeStoreSize(GetMemOperandType(Src)); + TypeSize DstWriteSize = DL.getTypeStoreSize(GetMemOperandType(Dst)); + + if ((IsUnalignedLoadStore(&DL, Src) || IsUnalignedLoadStore(&DL, Dst) || + (SrcWriteSize != DstWriteSize)) && + ModifiedGEPOp) { + LLVM_DEBUG( + dbgs() << "Memory accesses are different widths from non-GEPs\n"); + return std::make_unique(Src, Dst); + } + if (Delinearize) { if (tryDelinearize(Src, Dst, Pair)) { LLVM_DEBUG(dbgs() << " delinearized\n"); Index: llvm/test/Analysis/DependenceAnalysis/OverlappingAddresses.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/DependenceAnalysis/OverlappingAddresses.ll @@ -0,0 +1,35 @@ +; Verify that we do not consider overlapping accesses to be independent, +; even though the raw pointer values are different +; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ +; RUN: | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + +define void @overlapping0(i32* %A, i64 %n) nounwind uwtable ssp { +entry: + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %n + %arrayidx_cast = bitcast i32* %arrayidx to i64* + store i64 0, i64* %arrayidx_cast, align 4 + +; CHECK: da analyze - none! +; CHECK: da analyze - confused! +; CHECK: da analyze - confused! +; CHECK: da analyze - none +; CHECK: da analyze - confused! +; CHECK: da analyze - none! + + %add1 = add i64 %n, 1 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %add1 + %0 = load i32, i32* %arrayidx2, align 4 + + ; Even if the load and store types are the same, if their + ; alignments are smaller than the store size and they are + ; accesses through something other than a straightforward + ; gep, they can still overlap, as this access shows + %arrayidx2_cast = bitcast i32* %arrayidx2 to i64* + %1 = load i64, i64* %arrayidx2_cast, align 4 + ret void +} +