diff --git a/llvm/include/llvm/Analysis/DependenceAnalysis.h b/llvm/include/llvm/Analysis/DependenceAnalysis.h --- a/llvm/include/llvm/Analysis/DependenceAnalysis.h +++ b/llvm/include/llvm/Analysis/DependenceAnalysis.h @@ -506,8 +506,8 @@ /// e - 5 /// f - 6 /// g - 7 = MaxLevels - void establishNestingLevels(const Instruction *Src, - const Instruction *Dst); + void establishNestingLevels(const Instruction *Src, const Instruction *Dst, + const SmallVector Pairs); unsigned CommonLevels, SrcLevels, MaxLevels; diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -747,8 +747,9 @@ // e - 5 // f - 6 // g - 7 = MaxLevels -void DependenceInfo::establishNestingLevels(const Instruction *Src, - const Instruction *Dst) { +void DependenceInfo::establishNestingLevels( + const Instruction *Src, const Instruction *Dst, + const SmallVector Pairs) { const BasicBlock *SrcBlock = Src->getParent(); const BasicBlock *DstBlock = Dst->getParent(); unsigned SrcLevel = LI->getLoopDepth(SrcBlock); @@ -756,6 +757,30 @@ const Loop *SrcLoop = LI->getLoopFor(SrcBlock); const Loop *DstLoop = LI->getLoopFor(DstBlock); SrcLevels = SrcLevel; + // If any of the subscripts for an instruction have a recurrence with a loop + // level that does not contain that instruction, we need to update the + // corresponding level number, to ensure a distinct loop is assigned. This + // effectively treats the instruction as if it was present inside the + // recurrence's loop. This is conservatively correct, since it may only cause + // more constraints to be propaged to the common levels. + if (SrcLoop != DstLoop) { + const SCEV *InnerMostSrcExpr = Pairs[Pairs.size() - 1].Src; + const SCEV *InnerMostDstExpr = Pairs[Pairs.size() - 1].Dst; + const SCEVAddRecExpr *SrcAddRec = + dyn_cast_or_null(InnerMostSrcExpr); + const SCEVAddRecExpr *DstAddRec = + dyn_cast_or_null(InnerMostDstExpr); + if (SrcAddRec && mapSrcLoop(SrcAddRec->getLoop()) > SrcLevel) { + SrcLoop = SrcAddRec->getLoop(); + SrcLevels = LI->getLoopDepth(SrcLoop->getHeader()); + SrcLevel = SrcLevels; + } + if (DstAddRec && mapDstLoop(DstAddRec->getLoop()) > DstLevel) { + DstLoop = DstAddRec->getLoop(); + DstLevel = LI->getLoopDepth(DstLoop->getHeader()); + } + } + MaxLevels = SrcLevel + DstLevel; while (SrcLevel > DstLevel) { SrcLoop = SrcLoop->getParentLoop(); @@ -774,7 +799,6 @@ MaxLevels -= CommonLevels; } - // Given one of the loops containing the source, return // its level index in our numbering scheme. unsigned DependenceInfo::mapSrcLoop(const Loop *SrcLoop) const { @@ -787,6 +811,8 @@ unsigned DependenceInfo::mapDstLoop(const Loop *DstLoop) const { unsigned D = DstLoop->getLoopDepth(); if (D > CommonLevels) + // This tries to make sure that we assign unique numbers to src and dst when + // the memory accesses reside in different loops that have the same depth. return D - CommonLevels + SrcLevels; else return D; @@ -3558,16 +3584,10 @@ break; // The underlying objects alias; test accesses for dependence. } - // establish loop nesting levels - establishNestingLevels(Src, Dst); - LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); - LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); - - FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); ++TotalArrayPairs; unsigned Pairs = 1; - SmallVector Pair(Pairs); + SmallVector Pair(Pairs); const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); const SCEV *DstSCEV = SE->getSCEV(DstPtr); LLVM_DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); @@ -3592,6 +3612,13 @@ } } + // establish loop nesting levels + establishNestingLevels(Src, Dst, Pair); + LLVM_DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); + LLVM_DEBUG(dbgs() << " maximum nesting levels = " << MaxLevels << "\n"); + + FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); + for (unsigned P = 0; P < Pairs; ++P) { Pair[P].Loops.resize(MaxLevels + 1); Pair[P].GroupLoops.resize(MaxLevels + 1); @@ -3972,13 +3999,8 @@ AA, F->getParent()->getDataLayout(), MemoryLocation::get(Dst), MemoryLocation::get(Src)) == AliasResult::MustAlias); - // establish loop nesting levels - establishNestingLevels(Src, Dst); - - FullDependence Result(Src, Dst, false, CommonLevels); - unsigned Pairs = 1; - SmallVector Pair(Pairs); + SmallVector Pair(Pairs); const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); const SCEV *DstSCEV = SE->getSCEV(DstPtr); Pair[0].Src = SrcSCEV; @@ -3991,6 +4013,11 @@ } } + // establish loop nesting levels + establishNestingLevels(Src, Dst, Pair); + + FullDependence Result(Src, Dst, false, CommonLevels); + for (unsigned P = 0; P < Pairs; ++P) { Pair[P].Loops.resize(MaxLevels + 1); Pair[P].GroupLoops.resize(MaxLevels + 1); diff --git a/llvm/test/Analysis/DependenceAnalysis/MismatchingNestLevels.ll b/llvm/test/Analysis/DependenceAnalysis/MismatchingNestLevels.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/DependenceAnalysis/MismatchingNestLevels.ll @@ -0,0 +1,75 @@ +; RUN: opt < %s -disable-output "-passes=print" -aa-pipeline=basic-aa 2>&1 | FileCheck %s + +; CHECK-LABEL: 'Dependence Analysis' for function 'test1': +; CHECK: Src: %src = load double, double* %arrayidx295, align 8 --> Dst: %src = load double, double* %arrayidx295, align 8 +; CHECK-NEXT: da analyze - consistent input [S 0]! +; CHECK: Src: %src = load double, double* %arrayidx295, align 8 --> Dst: %1 = load double, double* %arrayidx98, align 8 +; CHECK-NEXT: da analyze - input [S|<]! +; CHECK: Src: %1 = load double, double* %arrayidx98, align 8 --> Dst: %1 = load double, double* %arrayidx98, align 8 +; CHECK-NEXT: da analyze - consistent input [S]! + +define dso_local void @test1([512 x double]* %temp) local_unnamed_addr { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.end63, %entry + br label %for.body272 + +for.body272: ; preds = %for.body272, %for.cond1.preheader + %r.1597 = phi i32 [ 1, %for.cond1.preheader ], [ %add277, %for.body272 ] + %idxprom274 = zext i32 %r.1597 to i64 + %add277 = add nuw nsw i32 %r.1597, 1 + %arrayidx295 = getelementptr inbounds [512 x double], [512 x double]* %temp, i64 %idxprom274, i64 510 + %src = load double, double* %arrayidx295, align 8 + br i1 undef, label %for.body272, label %for.body6 + +for.body6: ; preds = %for.body6, %for.body272 + %c.2594 = phi i32 [ 1, %for.body272 ], [ %add27, %for.body6 ] + %add27 = add nuw nsw i32 %c.2594, 1 + br i1 undef, label %for.body6, label %for.end63 + +for.end63: ; preds = %for.body6 + %c.1.lcssa = phi i32 [ %add27, %for.body6 ] + %sub96 = add nsw i32 %c.1.lcssa, -1 + %0 = zext i32 %sub96 to i64 + %arrayidx98 = getelementptr inbounds [512 x double], [512 x double]* %temp, i64 0, i64 %0 + %1 = load double, double* %arrayidx98, align 8 + br label %for.cond1.preheader +} + +; CHECK-LABEL: 'Dependence Analysis' for function 'test2': +; CHECK: Src: %1 = load double, double* %arrayidx98, align 8 --> Dst: %1 = load double, double* %arrayidx98, align 8 +; CHECK-NEXT: da analyze - consistent input [S]! +; CHECK: Src: %1 = load double, double* %arrayidx98, align 8 --> Dst: %2 = load double, double* %arrayidx295, align 8 +; CHECK-NEXT: da analyze - input [S|<]! +; CHECK: Src: %2 = load double, double* %arrayidx295, align 8 --> Dst: %2 = load double, double* %arrayidx295, align 8 +; CHECK-NEXT: da analyze - consistent input [S 0]! + +define dso_local void @test2([512 x double]* %temp) local_unnamed_addr #0 { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %for.body272, %entry + br label %for.body6 + +for.body6: ; preds = %for.body6, %for.cond1.preheader + %c.2594 = phi i32 [ 1, %for.cond1.preheader ], [ %add27, %for.body6 ] + %add27 = add nuw nsw i32 %c.2594, 1 + br i1 undef, label %for.body6, label %for.end63 + +for.end63: ; preds = %for.body6 + %c.1.lcssa = phi i32 [ %add27, %for.body6 ] + %sub96 = add nsw i32 %c.1.lcssa, -1 + %0 = zext i32 %sub96 to i64 + %arrayidx98 = getelementptr inbounds [512 x double], [512 x double]* %temp, i64 0, i64 %0 + %1 = load double, double* %arrayidx98, align 8 + br label %for.body272 + +for.body272: ; preds = %for.body272, %for.end63 + %r.1597 = phi i32 [ 1, %for.end63 ], [ %add277, %for.body272 ] + %idxprom274 = zext i32 %r.1597 to i64 + %add277 = add nuw nsw i32 %r.1597, 1 + %arrayidx295 = getelementptr inbounds [512 x double], [512 x double]* %temp, i64 %idxprom274, i64 510 + %2 = load double, double* %arrayidx295, align 8 + br i1 undef, label %for.body272, label %for.cond1.preheader +}