diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -1061,39 +1061,18 @@ // Invariant loads don't participate in caching. Thus no need to reconcile. if (!isInvariantLoad && !Pair.second) { if (CacheInfo->Size != Loc.Size) { - bool ThrowOutEverything; - if (CacheInfo->Size.hasValue() && Loc.Size.hasValue()) { - // FIXME: We may be able to do better in the face of results with mixed - // precision. We don't appear to get them in practice, though, so just - // be conservative. - ThrowOutEverything = - CacheInfo->Size.isPrecise() != Loc.Size.isPrecise() || - CacheInfo->Size.getValue() < Loc.Size.getValue(); - } else { - // For our purposes, unknown size > all others. - ThrowOutEverything = !Loc.Size.hasValue(); - } - - if (ThrowOutEverything) { - // The query's Size is greater than the cached one. Throw out the - // cached data and proceed with the query at the greater size. - CacheInfo->Pair = BBSkipFirstBlockPair(); - CacheInfo->Size = Loc.Size; - for (auto &Entry : CacheInfo->NonLocalDeps) - if (Instruction *Inst = Entry.getResult().getInst()) - RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); - CacheInfo->NonLocalDeps.clear(); - // The cache is cleared (in the above line) so we will have lost - // information about blocks we have already visited. We therefore must - // assume that the cache information is incomplete. - IsIncomplete = true; - } else { - // This query's Size is less than the cached one. Conservatively restart - // the query using the greater size. - return getNonLocalPointerDepFromBB( - QueryInst, Pointer, Loc.getWithNewSize(CacheInfo->Size), isLoad, - StartBB, Result, Visited, SkipFirstBlock, IsIncomplete); - } + // The query's Size is not the same as the cached one. Throw out the + // cached data and proceed with the query at the new size. + CacheInfo->Pair = BBSkipFirstBlockPair(); + CacheInfo->Size = Loc.Size; + for (auto &Entry : CacheInfo->NonLocalDeps) + if (Instruction *Inst = Entry.getResult().getInst()) + RemoveFromReverseMap(ReverseNonLocalPtrDeps, Inst, CacheKey); + CacheInfo->NonLocalDeps.clear(); + // The cache is cleared (in the above line) so we will have lost + // information about blocks we have already visited. We therefore must + // assume that the cache information is incomplete. + IsIncomplete = true; } // If the query's AATags are inconsistent with the cached one, diff --git a/llvm/test/Analysis/MemoryDependenceAnalysis/pr63559.ll b/llvm/test/Analysis/MemoryDependenceAnalysis/pr63559.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Analysis/MemoryDependenceAnalysis/pr63559.ll @@ -0,0 +1,108 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 +; RUN: opt -passes=gvn -S %s | FileCheck %s + +define void @test(ptr %array, i32 %v0, i32 %v1, i32 %v2) { +; CHECK-LABEL: define void @test +; CHECK-SAME: (ptr [[ARRAY:%.*]], i32 [[V0:%.*]], i32 [[V1:%.*]], i32 [[V2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ARRAY_2:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 32 +; CHECK-NEXT: [[ARRAY_3:%.*]] = getelementptr inbounds i8, ptr [[ARRAY]], i64 40 +; CHECK-NEXT: br label [[HEADER:%.*]] +; CHECK: header: +; CHECK-NEXT: [[ARRAY_2_PHI_1:%.*]] = phi ptr [ [[ARRAY_2]], [[ENTRY:%.*]] ], [ [[ARRAY_2_PHI_2:%.*]], [[LATCH:%.*]] ] +; CHECK-NEXT: [[ARRAY_3_PHI_1:%.*]] = phi ptr [ [[ARRAY_3]], [[ENTRY]] ], [ [[ARRAY_3_PHI_2:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[LOCAL_I5:%.*]] = phi i64 [ 4, [[ENTRY]] ], [ [[LOCAL_I5_NEXT:%.*]], [[LATCH]] ] +; CHECK-NEXT: [[V18:%.*]] = icmp eq i32 [[V1]], 0 +; CHECK-NEXT: br i1 [[V18]], label [[MERGE:%.*]], label [[THEN_BLOCK:%.*]] +; CHECK: then_block: +; CHECK-NEXT: [[ARRAY_RELOC_1:%.*]] = call ptr @realloc(ptr [[ARRAY]]) +; CHECK-NEXT: [[ARRAY_2_REMAT:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_RELOC_1]], i64 32 +; CHECK-NEXT: [[ARRAY_3_REMAT:%.*]] = getelementptr inbounds i8, ptr [[ARRAY_RELOC_1]], i64 40 +; CHECK-NEXT: br label [[MERGE]] +; CHECK: merge: +; CHECK-NEXT: [[ARRAY_3_PHI_2]] = phi ptr [ [[ARRAY_3_PHI_1]], [[HEADER]] ], [ [[ARRAY_3_REMAT]], [[THEN_BLOCK]] ] +; CHECK-NEXT: [[ARRAY_2_PHI_2]] = phi ptr [ [[ARRAY_2_PHI_1]], [[HEADER]] ], [ [[ARRAY_2_REMAT]], [[THEN_BLOCK]] ] +; CHECK-NEXT: [[V22:%.*]] = icmp eq i32 [[V0]], 0 +; CHECK-NEXT: br i1 [[V22]], label [[ITER_PEEL:%.*]], label [[SIDE_EXIT:%.*]] +; CHECK: iter.peel: +; CHECK-NEXT: [[VAL_ARRAY_2_3_1:%.*]] = load <2 x i64>, ptr [[ARRAY_2_PHI_2]], align 8 +; CHECK-NEXT: [[V24:%.*]] = insertelement <2 x i64> poison, i64 [[LOCAL_I5]], i64 0 +; CHECK-NEXT: [[V25:%.*]] = shufflevector <2 x i64> [[V24]], <2 x i64> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[VAL_ARRAY_2_3_2:%.*]] = add <2 x i64> [[VAL_ARRAY_2_3_1]], [[V25]] +; CHECK-NEXT: store <2 x i64> [[VAL_ARRAY_2_3_2]], ptr [[ARRAY_2_PHI_2]], align 8 +; CHECK-NEXT: [[VAL_ARRAY_3_1:%.*]] = load atomic i64, ptr [[ARRAY_3_PHI_2]] unordered, align 8 +; CHECK-NEXT: [[VAL_ARRAY_3_2:%.*]] = mul i64 [[VAL_ARRAY_3_1]], -1802 +; CHECK-NEXT: store atomic i64 [[VAL_ARRAY_3_2]], ptr [[ARRAY_3_PHI_2]] unordered, align 8 +; CHECK-NEXT: [[V30:%.*]] = icmp eq i32 [[V2]], 0 +; CHECK-NEXT: br i1 [[V30]], label [[LATCH]], label [[SIDE_EXIT]] +; CHECK: latch: +; CHECK-NEXT: [[VAL_ARRAY_2_3_3:%.*]] = load <2 x i64>, ptr [[ARRAY_2_PHI_2]], align 8 +; CHECK-NEXT: [[VAL_ARRAY_2_3_4:%.*]] = add <2 x i64> [[VAL_ARRAY_2_3_3]], [[V25]] +; CHECK-NEXT: store <2 x i64> [[VAL_ARRAY_2_3_4]], ptr [[ARRAY_2_PHI_2]], align 8 +; CHECK-NEXT: [[LOCAL_I5_NEXT]] = add nuw nsw i64 [[LOCAL_I5]], 2 +; CHECK-NEXT: [[TEST_I5:%.*]] = icmp ugt i64 [[LOCAL_I5]], 250 +; CHECK-NEXT: br i1 [[TEST_I5]], label [[RETURN:%.*]], label [[HEADER]] +; CHECK: return: +; CHECK-NEXT: ret void +; CHECK: side_exit: +; CHECK-NEXT: [[VAL_ARRAY_2_1:%.*]] = load atomic i64, ptr [[ARRAY_2_PHI_2]] unordered, align 8 +; CHECK-NEXT: [[VAL_ARRAY_2_2:%.*]] = add i64 [[VAL_ARRAY_2_1]], [[LOCAL_I5]] +; CHECK-NEXT: store atomic i64 [[VAL_ARRAY_2_2]], ptr [[ARRAY_2_PHI_2]] unordered, align 8 +; CHECK-NEXT: ret void +; +entry: + %array.2 = getelementptr inbounds i8, ptr %array, i64 32 + %array.3 = getelementptr inbounds i8, ptr %array, i64 40 + br label %header + +header: + %array.2.phi.1 = phi ptr [ %array.2, %entry ], [ %array.2.phi.2, %latch ] + %array.3.phi.1 = phi ptr [ %array.3, %entry ], [ %array.3.phi.2, %latch ] + %local_i5 = phi i64 [ 4, %entry ], [ %local_i5.next, %latch ] + %v18 = icmp eq i32 %v1, 0 + br i1 %v18, label %merge, label %then_block + +then_block: + %array.reloc.1 = call ptr @realloc(ptr %array) + %array.2.remat = getelementptr inbounds i8, ptr %array.reloc.1, i64 32 + %array.3.remat = getelementptr inbounds i8, ptr %array.reloc.1, i64 40 + br label %merge + +merge: + %array.3.phi.2 = phi ptr [ %array.3.phi.1, %header ], [ %array.3.remat, %then_block ] + %array.2.phi.2 = phi ptr [ %array.2.phi.1, %header ], [ %array.2.remat, %then_block ] + %v22 = icmp eq i32 %v0, 0 + br i1 %v22, label %iter.peel, label %side_exit + +iter.peel: + %val.array.2_3.1 = load <2 x i64>, ptr %array.2.phi.2, align 8 + %v24 = insertelement <2 x i64> poison, i64 %local_i5, i64 0 + %v25 = shufflevector <2 x i64> %v24, <2 x i64> poison, <2 x i32> zeroinitializer + %val.array.2_3.2 = add <2 x i64> %val.array.2_3.1, %v25 + store <2 x i64> %val.array.2_3.2, ptr %array.2.phi.2, align 8 + %val.array.3.1 = load atomic i64, ptr %array.3.phi.2 unordered, align 8 + %val.array.3.2 = mul i64 %val.array.3.1, -1802 + store atomic i64 %val.array.3.2, ptr %array.3.phi.2 unordered, align 8 + %v30 = icmp eq i32 %v2, 0 + br i1 %v30, label %latch, label %side_exit + +latch: + %val.array.2_3.3 = load <2 x i64>, ptr %array.2.phi.2, align 8 + %val.array.2_3.4 = add <2 x i64> %val.array.2_3.3, %v25 + store <2 x i64> %val.array.2_3.4, ptr %array.2.phi.2, align 8 + %local_i5.next= add nuw nsw i64 %local_i5, 2 + %test_i5 = icmp ugt i64 %local_i5, 250 + br i1 %test_i5, label %return, label %header + +return: + ret void + +side_exit: + %array.2.phi.2.lcssa = phi ptr [ %array.2.phi.2, %iter.peel ], [ %array.2.phi.2, %merge ] + %val.array.2.1 = load atomic i64, ptr %array.2.phi.2.lcssa unordered, align 8 + %val.array.2.2 = add i64 %val.array.2.1, %local_i5 + store atomic i64 %val.array.2.2, ptr %array.2.phi.2.lcssa unordered, align 8 + ret void +} + +declare ptr @realloc(ptr) diff --git a/llvm/test/Transforms/GVN/PRE/rle.ll b/llvm/test/Transforms/GVN/PRE/rle.ll --- a/llvm/test/Transforms/GVN/PRE/rle.ll +++ b/llvm/test/Transforms/GVN/PRE/rle.ll @@ -1057,7 +1057,7 @@ ; LE-NEXT: [[TMP0:%.*]] = trunc i32 [[V_1_32]] to i8 ; LE-NEXT: br label [[LOOP:%.*]] ; LE: loop: -; LE-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[V_I_PRE:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ] +; LE-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP0]], [[ENTRY:%.*]] ], [ [[TMP2:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ] ; LE-NEXT: [[I:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ] ; LE-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] ; LE-NEXT: call void @use.i8(i8 [[V_I]]) @@ -1065,10 +1065,10 @@ ; LE-NEXT: call void @use.i32(i32 [[V_I_32]]) ; LE-NEXT: [[I_INC]] = add i64 [[I]], 1 ; LE-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64 +; LE-NEXT: [[TMP1:%.*]] = lshr i32 [[V_I_32]], 8 +; LE-NEXT: [[TMP2]] = trunc i32 [[TMP1]] to i8 ; LE-NEXT: br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.*]] ; LE: loop.loop_crit_edge: -; LE-NEXT: [[P_I_PHI_TRANS_INSERT:%.*]] = getelementptr i8, ptr [[P]], i64 [[I_INC]] -; LE-NEXT: [[V_I_PRE]] = load i8, ptr [[P_I_PHI_TRANS_INSERT]], align 1 ; LE-NEXT: br label [[LOOP]] ; LE: exit: ; LE-NEXT: ret void @@ -1084,7 +1084,7 @@ ; BE-NEXT: [[TMP1:%.*]] = trunc i32 [[TMP0]] to i8 ; BE-NEXT: br label [[LOOP:%.*]] ; BE: loop: -; BE-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[V_I_PRE:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ] +; BE-NEXT: [[V_I:%.*]] = phi i8 [ [[TMP1]], [[ENTRY:%.*]] ], [ [[TMP3:%.*]], [[LOOP_LOOP_CRIT_EDGE:%.*]] ] ; BE-NEXT: [[I:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[I_INC:%.*]], [[LOOP_LOOP_CRIT_EDGE]] ] ; BE-NEXT: [[P_I:%.*]] = getelementptr i8, ptr [[P]], i64 [[I]] ; BE-NEXT: call void @use.i8(i8 [[V_I]]) @@ -1092,10 +1092,10 @@ ; BE-NEXT: call void @use.i32(i32 [[V_I_32]]) ; BE-NEXT: [[I_INC]] = add i64 [[I]], 1 ; BE-NEXT: [[CMP:%.*]] = icmp ne i64 [[I_INC]], 64 +; BE-NEXT: [[TMP2:%.*]] = lshr i32 [[V_I_32]], 16 +; BE-NEXT: [[TMP3]] = trunc i32 [[TMP2]] to i8 ; BE-NEXT: br i1 [[CMP]], label [[LOOP_LOOP_CRIT_EDGE]], label [[EXIT:%.*]] ; BE: loop.loop_crit_edge: -; BE-NEXT: [[P_I_PHI_TRANS_INSERT:%.*]] = getelementptr i8, ptr [[P]], i64 [[I_INC]] -; BE-NEXT: [[V_I_PRE]] = load i8, ptr [[P_I_PHI_TRANS_INSERT]], align 1 ; BE-NEXT: br label [[LOOP]] ; BE: exit: ; BE-NEXT: ret void