Index: llvm/include/llvm/Analysis/DependenceAnalysis.h =================================================================== --- llvm/include/llvm/Analysis/DependenceAnalysis.h +++ llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -282,12 +282,11 @@ /// depends - Tests for a dependence between the Src and Dst instructions. /// Returns NULL if no dependence; otherwise, returns a Dependence (or a /// FullDependence) with as much information as can be gleaned. - /// The flag PossiblyLoopIndependent should be set by the caller - /// if it appears that control flow can reach from Src to Dst - /// without traversing a loop back edge. + /// If PossiblyIntraIteration is true will also test for intra-iteration + /// dependencies. std::unique_ptr depends(Instruction *Src, Instruction *Dst, - bool PossiblyLoopIndependent); + bool PossiblyIntraIteration); /// getSplitIteration - Give a dependence that's splittable at some /// particular level, return the iteration that should be used to split Index: llvm/lib/Analysis/DependenceAnalysis.cpp =================================================================== --- llvm/lib/Analysis/DependenceAnalysis.cpp +++ llvm/lib/Analysis/DependenceAnalysis.cpp @@ -259,10 +259,10 @@ // FullDependence methods FullDependence::FullDependence(Instruction *Source, Instruction *Destination, - bool PossiblyLoopIndependent, + bool PossiblyIntraIteration, unsigned CommonLevels) : Dependence(Source, Destination), Levels(CommonLevels), - LoopIndependent(PossiblyLoopIndependent) { + LoopIndependent(PossiblyIntraIteration) { Consistent = true; if (CommonLevels) DV = std::make_unique(CommonLevels); @@ -3512,6 +3512,8 @@ // depends - // Returns NULL if there is no dependence. // Otherwise, return a Dependence with as many details as possible. +// If PossiblyIntraIteration is true will also test for intra-iteration +// dependencies. // Corresponds to Section 3.1 in the paper // // Practical Dependence Testing @@ -3522,9 +3524,9 @@ // up to date with respect to this routine. std::unique_ptr DependenceInfo::depends(Instruction *Src, Instruction *Dst, - bool PossiblyLoopIndependent) { + bool PossiblyIntraIteration) { if (Src == Dst) - PossiblyLoopIndependent = false; + PossiblyIntraIteration = false; if (!(Src->mayReadOrWriteMemory() && Dst->mayReadOrWriteMemory())) // if both instructions don't reference memory, there's no dependence @@ -3562,9 +3564,14 @@ LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); - FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); + FullDependence Result(Src, Dst, PossiblyIntraIteration, CommonLevels); ++TotalArrayPairs; + if (!PossiblyIntraIteration && CommonLevels == 0) { + LLVM_DEBUG(dbgs() << "no common loops and no intra-iteration analysis\n"); + return nullptr; + } + unsigned Pairs = 1; SmallVector Pair(Pairs); const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); @@ -3583,6 +3590,40 @@ } Pair[0].Src = SrcSCEV; Pair[0].Dst = DstSCEV; + const DataLayout &DL = F->getParent()->getDataLayout(); + + // DependenceAnalysis isn't able to deal with different + // access widths. If access widths are the same, but alignment is + // smaller than the store size, accesses could overlap. + // Reject these cases. + auto IsUnalignedLoadStore = [&](const Instruction *I) { + const LoadInst *LI = dyn_cast(I); + const StoreInst *SI = dyn_cast(I); + Type *IType = (LI) ? LI->getType() : SI->getValueOperand()->getType(); + uint64_t Width = DL.getTypeStoreSize(IType); + uint64_t Alignment = (LI) ? LI->getAlign().value() : SI->getAlign().value(); + + return ((Alignment % Width) != 0); + }; + + bool ModifiedGEPOp = + !dyn_cast(SrcPtr) || !dyn_cast(DstPtr); + Type *SrcOpType = + static_cast(MemoryLocation::get(Src).Ptr->getType()) + ->getElementType(); + Type *DstOpType = + static_cast(MemoryLocation::get(Dst).Ptr->getType()) + ->getElementType(); + TypeSize SrcWriteSize = DL.getTypeStoreSize(SrcOpType); + TypeSize DstWriteSize = DL.getTypeStoreSize(DstOpType); + + if ((IsUnalignedLoadStore(Src) || IsUnalignedLoadStore(Dst) || + (SrcWriteSize != DstWriteSize)) && + ModifiedGEPOp) { + LLVM_DEBUG( + dbgs() << "Memory accesses are different widths from non-GEPs\n"); + return std::make_unique(Src, Dst); + } if (Delinearize) { if (tryDelinearize(Src, Dst, Pair)) { @@ -3880,7 +3921,7 @@ if (CompleteLoops[II]) Result.DV[II - 1].Scalar = false; - if (PossiblyLoopIndependent) { + if (PossiblyIntraIteration) { // Make sure the LoopIndependent flag is set correctly. // All directions must include equal, otherwise no // loop-independent dependence is possible. Index: llvm/test/Analysis/DependenceAnalysis/OverlappingAddresses.ll =================================================================== --- /dev/null +++ llvm/test/Analysis/DependenceAnalysis/OverlappingAddresses.ll @@ -0,0 +1,35 @@ +; Verify that we do not consider overlapping accesses to be independent, +; even though the raw pointer values are different +; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 \ +; RUN: | FileCheck %s +; RUN: opt < %s -analyze -enable-new-pm=0 -basic-aa -da | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.6.0" + +define void @overlapping0(i32* %A, i64 %n) nounwind uwtable ssp { +entry: + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %n + %arrayidx_cast = bitcast i32* %arrayidx to i64* + store i64 0, i64* %arrayidx_cast, align 4 + +; CHECK: da analyze - none! +; CHECK: da analyze - confused! +; CHECK: da analyze - confused! +; CHECK: da analyze - none +; CHECK: da analyze - confused! +; CHECK: da analyze - none! + + %add1 = add i64 %n, 1 + %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %add1 + %0 = load i32, i32* %arrayidx2, align 4 + + ; Even if the load and store types are the same, if their + ; alignments are smaller than the store size and they are + ; accesses through something other than a straightforward + ; gep, they can still overlap, as this access shows + %arrayidx2_cast = bitcast i32* %arrayidx2 to i64* + %1 = load i64, i64* %arrayidx2_cast, align 4 + ret void +} +