diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp --- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp +++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp @@ -125,8 +125,10 @@ // necessary to compare uses/defs in the same block. Doing so allows us to walk // the minimum number of instructions necessary to compute our def/use ordering. struct ValueDFS_Compare { + DominatorTree &DT; OrderedInstructions &OI; - ValueDFS_Compare(OrderedInstructions &OI) : OI(OI) {} + ValueDFS_Compare(DominatorTree &DT, OrderedInstructions &OI) + : DT(DT), OI(OI) {} bool operator()(const ValueDFS &A, const ValueDFS &B) const { if (&A == &B) @@ -145,9 +147,13 @@ if (SameBlock && A.LocalNum == LN_Last && B.LocalNum == LN_Last) return comparePHIRelated(A, B); + bool isADef = A.Def; + bool isAU = A.U; + bool isBDef = B.Def; + bool isBU = B.U; if (!SameBlock || A.LocalNum != LN_Middle || B.LocalNum != LN_Middle) - return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, A.Def, A.U) < - std::tie(B.DFSIn, B.DFSOut, B.LocalNum, B.Def, B.U); + return std::tie(A.DFSIn, A.DFSOut, A.LocalNum, isADef, isAU) < + std::tie(B.DFSIn, B.DFSOut, B.LocalNum, isBDef, isBU); return localComesBefore(A, B); } @@ -164,10 +170,41 @@ // For two phi related values, return the ordering. bool comparePHIRelated(const ValueDFS &A, const ValueDFS &B) const { - auto &ABlockEdge = getBlockEdge(A); - auto &BBlockEdge = getBlockEdge(B); - // Now sort by block edge and then defs before uses. - return std::tie(ABlockEdge, A.Def, A.U) < std::tie(BBlockEdge, B.Def, B.U); + BasicBlock *ASrc, *ADest, *BSrc, *BDest; + std::tie(ASrc, ADest) = getBlockEdge(A); + std::tie(BSrc, BDest) = getBlockEdge(B); + +#ifndef NDEBUG + // This function should only be used for values in the same BB, check that. + DomTreeNode *DomASrc = DT.getNode(ASrc); + DomTreeNode *DomBSrc = DT.getNode(BSrc); + assert(DomASrc->getDFSNumIn() == (unsigned)A.DFSIn && + DomASrc->getDFSNumOut() == (unsigned)A.DFSOut && + "DFS numbers for A should match the ones of the source block"); + assert(DomBSrc->getDFSNumIn() == (unsigned)B.DFSIn && + DomBSrc->getDFSNumOut() == (unsigned)B.DFSOut && + "DFS numbers for B should match the ones of the source block"); + assert(A.DFSIn == B.DFSIn && A.DFSOut == B.DFSOut && + "Values must be in the same block"); +#endif + (void)ASrc; + (void)BSrc; + + // Use DFS numbers to compare destination blocks, to guarantee a + // deterministic order. + DomTreeNode *DomADest = DT.getNode(ADest); + DomTreeNode *DomBDest = DT.getNode(BDest); + unsigned AIn = DomADest->getDFSNumIn(); + unsigned AOut = DomADest->getDFSNumOut(); + unsigned BIn = DomBDest->getDFSNumIn(); + unsigned BOut = DomBDest->getDFSNumOut(); + bool isADef = A.Def; + bool isAU = A.U; + bool isBDef = B.Def; + bool isBU = B.U; + // Now sort by edge destination and then defs before uses. + return std::tie(AIn, AOut, isADef, isAU) < + std::tie(BIn, BOut, isBDef, isBU); } // Get the definition of an instruction that occurs in the middle of a block. @@ -567,7 +604,7 @@ // TODO: Use this algorithm to perform fast single-variable renaming in // promotememtoreg and memoryssa. void PredicateInfo::renameUses(SmallVectorImpl &OpsToRename) { - ValueDFS_Compare Compare(OI); + ValueDFS_Compare Compare(DT, OI); // Compute liveness, and rename in O(uses) per Op. for (auto *Op : OpsToRename) { LLVM_DEBUG(dbgs() << "Visiting " << *Op << "\n"); diff --git a/llvm/test/Transforms/SCCP/ipsccp-predinfo-uselists.ll b/llvm/test/Transforms/SCCP/ipsccp-predinfo-uselists.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SCCP/ipsccp-predinfo-uselists.ll @@ -0,0 +1,76 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -ipsccp -preserve-ll-uselistorder -S %s | FileCheck %s + +declare i32 @hoge() + +define dso_local i32 @ham(i8* %arg, i8* %arg1) { +; CHECK-LABEL: @ham( +; CHECK-NEXT: bb: +; CHECK-NEXT: [[TMP:%.*]] = alloca i32 +; CHECK-NEXT: [[TMP2:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label [[BB19:%.*]] +; CHECK: bb4: +; CHECK-NEXT: br label [[BB6:%.*]] +; CHECK: bb6: +; CHECK-NEXT: [[TMP7:%.*]] = call i32 @hoge() +; CHECK-NEXT: store i32 [[TMP7]], i32* [[TMP]] +; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 912730082 +; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP]] +; CHECK-NEXT: br i1 [[TMP9]], label [[BB11:%.*]], label [[BB16:%.*]] +; CHECK: bb11: +; CHECK-NEXT: unreachable +; CHECK: bb13: +; CHECK-NEXT: br label [[BB14:%.*]] +; CHECK: bb14: +; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP]] +; CHECK-NEXT: br label [[BB16]] +; CHECK: bb16: +; CHECK-NEXT: [[TMP17:%.*]] = phi i32 [ [[TMP10]], [[BB6]] ], [ 0, [[BB14]] ] +; CHECK-NEXT: br label [[BB19]] +; CHECK: bb18: +; CHECK-NEXT: unreachable +; CHECK: bb19: +; CHECK-NEXT: br label [[BB20:%.*]] +; CHECK: bb20: +; CHECK-NEXT: indirectbr i8* null, [label [[BB4:%.*]], label [[BB13:%.*]], label %bb18] +; +bb: + %tmp = alloca i32 + %tmp2 = alloca i32, align 4 + br label %bb19 + +bb4: ; preds = %bb20 + br label %bb6 + +bb6: ; preds = %bb4 + %tmp7 = call i32 @hoge() + store i32 %tmp7, i32* %tmp + %tmp8 = load i32, i32* %tmp + %tmp9 = icmp eq i32 %tmp8, 912730082 + %tmp10 = load i32, i32* %tmp + br i1 %tmp9, label %bb11, label %bb16 + +bb11: ; preds = %bb6 + unreachable + +bb13: ; preds = %bb20 + br label %bb14 + +bb14: ; preds = %bb13 + %tmp15 = load i32, i32* %tmp + br label %bb16 + +bb16: ; preds = %bb14, %bb6 + %tmp17 = phi i32 [ %tmp10, %bb6 ], [ 0, %bb14 ] + br label %bb19 + +bb18: ; preds = %bb20 + unreachable + +bb19: ; preds = %bb16, %bb + br label %bb20 + +bb20: ; preds = %bb19 + indirectbr i8* null, [label %bb4, label %bb13, label %bb18] +}