diff --git a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp --- a/llvm/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/llvm/lib/Transforms/Scalar/EarlyCSE.cpp @@ -1636,6 +1636,16 @@ auto *MSSA = UseMemorySSA ? &AM.getResult(F).getMSSA() : nullptr; + // DomTree updates are not always deterministic, so the child lists in the + // dominator tree are not sorted in any way (see discussion here + // https://lists.llvm.org/pipermail/llvm-dev/2021-September/152839.html). To + // get a deterministic result from EarlyCSE we need to make sure our DFS + // iteration over the DT is processing the nodes in a deterministic order. An + // easy solution is to recalculate the DT from scratch here (assuming that + // such an operation would populate the child lists in the DT implemenation in + // the same order every time). + DT.recalculate(F); + EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA); if (!CSE.run()) @@ -1690,6 +1700,15 @@ auto *MSSA = UseMemorySSA ? &getAnalysis().getMSSA() : nullptr; + // DomTree updates are not always deterministic, so the child lists in the + // dominator tree are not sorted in any way. To get a deterministic result + // from EarlyCSE we need to make sure out DFS iterations over the DT is + // processing nodes in a deterministic order. An easy solution is to + // recalculate the DT from scratch here (assuming that such an operation + // would populate the child lists in the DT implemenation in the same order + // every time). + DT.recalculate(F); + EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA); return CSE.run(); diff --git a/llvm/test/Transforms/EarlyCSE/deterministic-domtree-iteration.ll b/llvm/test/Transforms/EarlyCSE/deterministic-domtree-iteration.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/EarlyCSE/deterministic-domtree-iteration.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -S -passes='jump-threading,early-cse' | FileCheck %s + +; The idea behind this test case is to verify that the output from early-cse +; is deterministic, despite the fact that domtree updates from jump-threading +; aren't done in a deterministic order. +; +; Jump threading is using llvm::MergeBasicBlockIntoOnlyPred in this test +; case. So the important property of the test is probably that it triggers a +; call to that function, and that the PredsOfPredBB set that is used to +; populate Updates for the DomTreeUpdater is populated with more than one +; entry (and then the order in which updates are performed depends on +; iteration order for that set). As long as that behavior is kept, we end up +; with a DominatorTree for which the order in which child nodes are visited +; can be seen as random when iterating over the child node vectors. +; +; Here is should be mentioned that EarlyCSE isn't doing any kind of fixed +; point iteration or similar. So the end result may depend on the order in +; which nodes are processed. To get a deterministic result EarlyCSE need to +; somehow sort the nodes to ensure that they are visited in the same order, +; regardless of how they are sorted inside the DominatorTree child vectors. +; +; Unfortunately this test case is quite large, but it happenes to trigger the +; non-determinism in dominator tree updates quite often when being executed +; multipe times (I could typically see varying results when running the test +; less that 10 times in a row). + +@a = dso_local local_unnamed_addr global i16 0, align 1 + +; Function Attrs: nounwind +define dso_local i16 @g(i16 %a0, i16 %a1, i16 %a2, i16 %a3) local_unnamed_addr { +; CHECK-LABEL: @g( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i16 [[A0:%.*]], 0 +; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_COND1:%.*]], label [[INFINITE_LOOP:%.*]] +; CHECK: infinite.loop: +; CHECK-NEXT: br label [[INFINITE_LOOP]] +; CHECK: for.cond1: +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i16 [ [[RETVAL_311:%.*]], [[FOR_INC19:%.*]] ], [ undef, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[I_0:%.*]] = phi i16 [ [[I_312:%.*]], [[FOR_INC19]] ], [ undef, [[ENTRY]] ] +; CHECK-NEXT: [[TOBOOL2_NOT:%.*]] = icmp eq i16 [[A1:%.*]], 0 +; CHECK-NEXT: br i1 [[TOBOOL2_NOT]], label [[CLEANUP16_THREAD:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: [[TOBOOL3_NOT:%.*]] = icmp eq i16 [[A2:%.*]], 0 +; CHECK-NEXT: br i1 [[TOBOOL3_NOT]], label [[CLEANUP16_THREAD]], label [[FOR_COND5_PREHEADER:%.*]] +; CHECK: for.cond5.preheader: +; CHECK-NEXT: [[TOBOOL8_NOT:%.*]] = icmp eq i16 [[A3:%.*]], 0 +; CHECK-NEXT: [[TOBOOL6_NOT31:%.*]] = icmp eq i16 [[I_0]], 0 +; CHECK-NEXT: br i1 [[TOBOOL6_NOT31]], label [[CLEANUP:%.*]], label [[FOR_BODY7:%.*]] +; CHECK: for.body7: +; CHECK-NEXT: [[I_132:%.*]] = phi i16 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[I_0]], [[FOR_COND5_PREHEADER]] ] +; CHECK-NEXT: br i1 [[TOBOOL8_NOT]], label [[FOR_INC]], label [[RETURN:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[INC]] = add i16 [[I_132]], 1 +; CHECK-NEXT: [[TOBOOL6_NOT:%.*]] = icmp eq i16 [[INC]], 0 +; CHECK-NEXT: br i1 [[TOBOOL6_NOT]], label [[CLEANUP16_THREAD]], label [[FOR_BODY7]] +; CHECK: cleanup: +; CHECK-NEXT: [[DOT26:%.*]] = select i1 [[TOBOOL8_NOT]], i32 0, i32 4 +; CHECK-NEXT: br i1 [[TOBOOL8_NOT]], label [[CLEANUP16_THREAD]], label [[CLEANUP16:%.*]] +; CHECK: cleanup16.thread: +; CHECK-NEXT: [[I_2:%.*]] = phi i16 [ [[I_0]], [[CLEANUP]] ], [ [[I_0]], [[IF_THEN]] ], [ [[I_0]], [[FOR_COND1]] ], [ 0, [[FOR_INC]] ] +; CHECK-NEXT: store i16 0, i16* @a, align 1 +; CHECK-NEXT: br label [[FOR_INC19]] +; CHECK: cleanup16: +; CHECK-NEXT: switch i32 [[DOT26]], label [[UNREACHABLE:%.*]] [ +; CHECK-NEXT: i32 0, label [[FOR_INC19]] +; CHECK-NEXT: i32 1, label [[RETURN]] +; CHECK-NEXT: i32 4, label [[FOR_END21:%.*]] +; CHECK-NEXT: ] +; CHECK: for.inc19: +; CHECK-NEXT: [[I_312]] = phi i16 [ [[I_2]], [[CLEANUP16_THREAD]] ], [ [[I_0]], [[CLEANUP16]] ] +; CHECK-NEXT: [[RETVAL_311]] = phi i16 [ [[RETVAL_0]], [[CLEANUP16_THREAD]] ], [ [[RETVAL_0]], [[CLEANUP16]] ] +; CHECK-NEXT: br label [[FOR_COND1]] +; CHECK: for.end21: +; CHECK-NEXT: br label [[RETURN]] +; CHECK: return: +; CHECK-NEXT: [[RETVAL_4:%.*]] = phi i16 [ 17, [[FOR_END21]] ], [ [[RETVAL_0]], [[CLEANUP16]] ], [ 1, [[FOR_BODY7]] ] +; CHECK-NEXT: ret i16 [[RETVAL_4]] +; CHECK: unreachable: +; CHECK-NEXT: unreachable +; +entry: + %tobool.not = icmp eq i16 %a0, 0 + br i1 %tobool.not, label %for.cond1, label %infinite.loop + +infinite.loop: ; preds = %infinite.loop, %entry + br label %infinite.loop + +for.cond1: ; preds = %for.inc19, %entry + %retval.0 = phi i16 [ %retval.3, %for.inc19 ], [ undef, %entry ] + %i.0 = phi i16 [ %i.3, %for.inc19 ], [ undef, %entry ] + %tobool2.not = icmp eq i16 %a1, 0 + br i1 %tobool2.not, label %if.end15, label %if.then + +if.then: ; preds = %for.cond1 + %tobool3.not = icmp eq i16 %a2, 0 + br i1 %tobool3.not, label %if.end15, label %for.cond5.preheader + +for.cond5.preheader: ; preds = %if.then + %tobool8.not = icmp eq i16 %a3, 0 + %tobool6.not31 = icmp eq i16 %i.0, 0 + br i1 %tobool6.not31, label %for.end10, label %for.body7 + +for.body7: ; preds = %for.inc, %for.cond5.preheader + %i.132 = phi i16 [ %inc, %for.inc ], [ %i.0, %for.cond5.preheader ] + br i1 %tobool8.not, label %for.inc, label %cleanup + +for.inc: ; preds = %for.body7 + %inc = add i16 %i.132, 1 + %tobool6.not = icmp eq i16 %inc, 0 + br i1 %tobool6.not, label %for.end10, label %for.body7 + +for.end10: ; preds = %for.inc, %for.cond5.preheader + %i.1.lcssa = phi i16 [ %i.0, %for.cond5.preheader ], [ 0, %for.inc ] + %.26 = select i1 %tobool8.not, i32 0, i32 4 + br label %cleanup + +cleanup: ; preds = %for.end10, %for.body7 + %i.128 = phi i16 [ %i.1.lcssa, %for.end10 ], [ %i.0, %for.body7 ] + %retval.1 = phi i16 [ %retval.0, %for.end10 ], [ 1, %for.body7 ] + %cond = phi i1 [ %tobool8.not, %for.end10 ], [ false, %for.body7 ] + %cleanup.dest.slot.0 = phi i32 [ %.26, %for.end10 ], [ 1, %for.body7 ] + br i1 %cond, label %if.end15, label %cleanup16 + +if.end15: ; preds = %cleanup, %if.then, %for.cond1 + %retval.2 = phi i16 [ %retval.1, %cleanup ], [ %retval.0, %if.then ], [ %retval.0, %for.cond1 ] + %i.2 = phi i16 [ %i.128, %cleanup ], [ %i.0, %if.then ], [ %i.0, %for.cond1 ] + store i16 0, i16* @a, align 1 + br label %cleanup16 + +cleanup16: ; preds = %if.end15, %cleanup + %retval.3 = phi i16 [ %retval.2, %if.end15 ], [ %retval.1, %cleanup ] + %i.3 = phi i16 [ %i.2, %if.end15 ], [ %i.128, %cleanup ] + %cleanup.dest.slot.1 = phi i32 [ 0, %if.end15 ], [ %cleanup.dest.slot.0, %cleanup ] + switch i32 %cleanup.dest.slot.1, label %unreachable [ + i32 0, label %for.inc19 + i32 1, label %return + i32 4, label %for.end21 + ] + +for.inc19: ; preds = %cleanup16 + br label %for.cond1 + +for.end21: ; preds = %cleanup16 + br label %return + +return: ; preds = %for.end21, %cleanup16 + %retval.4 = phi i16 [ 17, %for.end21 ], [ %retval.3, %cleanup16 ] + ret i16 %retval.4 + +unreachable: ; preds = %cleanup16 + unreachable +}