Index: llvm/include/llvm/IR/Dominators.h =================================================================== --- llvm/include/llvm/IR/Dominators.h +++ llvm/include/llvm/IR/Dominators.h @@ -202,6 +202,15 @@ void viewGraph(); }; +/// Edges and BBs hold a set of multiple points in the function. +struct DominanceSources { + SmallVector Edges; + SmallPtrSet BBs; + /// Returns true if all predecessors of BB are dominated by at least one + /// edge or block in Edges / BBs. + bool dominates(const BasicBlock *BB, DominatorTree &DT); +}; + //===------------------------------------- // DominatorTree GraphTraits specializations so the DominatorTree can be // iterable by generic graph iterators. Index: llvm/include/llvm/Transforms/Scalar/GVN.h =================================================================== --- llvm/include/llvm/Transforms/Scalar/GVN.h +++ llvm/include/llvm/Transforms/Scalar/GVN.h @@ -249,6 +249,20 @@ // of BlockRPONumber prior to accessing the contents of BlockRPONumber. bool InvalidBlockRPONumbers = true; + // A mapping from a known equivalence to all the sources of dominance where + // it is valid. + struct ValuePair : std::pair { + ValuePair(Value *LHS, Value *RHS) : std::pair(LHS, RHS) {} + bool operator<(const ValuePair &Other) const { + return first != Other.first ? first < Other.first : second < Other.second; + } + }; + std::map EquivalenceSources; + + // Walk through EquivalenceSources and replace any users now found to be + // dominated. + bool inferCollectiveDominance(Function &F); + using LoadDepVect = SmallVector; using AvailValInBlkVect = SmallVector; using UnavailBlkVect = SmallVector; @@ -342,6 +356,9 @@ bool splitCriticalEdges(); BasicBlock *splitCriticalEdges(BasicBlock *Pred, BasicBlock *Succ); bool replaceOperandsForInBlockEquality(Instruction *I) const; + bool replaceDominatedUsesWith(Value *From, Value *To, + const BasicBlockEdge &Root); + bool replaceDominatedUsesWith(Value *From, Value *To, const BasicBlock *BB); bool propagateEquality(Value *LHS, Value *RHS, const BasicBlockEdge &Root, bool DominatesByEdge); bool processFoldableCondBr(BranchInst *BI); Index: llvm/include/llvm/Transforms/Utils/Local.h =================================================================== --- llvm/include/llvm/Transforms/Utils/Local.h +++ llvm/include/llvm/Transforms/Utils/Local.h @@ -418,6 +418,13 @@ unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT, const BasicBlock *BB); +/// Replace each use of 'From' with 'To' if that use is dominated +/// collectively by Sources. The blocks in F found to be dominated by Sources +/// are inserted into Sources.BBs. Returns the number of replacements made. +unsigned replaceDominatedUsesWith(Value *From, Value *To, + Function &F, DominatorTree &DT, + DominanceSources &Sources); + /// Return true if this call calls a gc leaf function. /// /// A leaf function is a function that does not safepoint the thread during its Index: llvm/lib/IR/Dominators.cpp =================================================================== --- llvm/lib/IR/Dominators.cpp +++ llvm/lib/IR/Dominators.cpp @@ -340,6 +340,29 @@ return dominates(BBE1, BBE2.getStart()); } +bool DominanceSources::dominates(const BasicBlock *BB, DominatorTree &DT) { + if (!BB->hasNPredecessorsOrMore(1)) + return false; + for (const BasicBlock *PredBB : predecessors(BB)) { + bool PredDominated = false; + for (const BasicBlockEdge &E : Edges) + if (DT.dominates(E, PredBB)) { + PredDominated = true; + break; + } + if (PredDominated) + continue; + for (const BasicBlock *SourceBB : BBs) + if (DT.dominates(SourceBB, PredBB)) { + PredDominated = true; + break; + } + if (!PredDominated) + return false; + } + return true; +} + //===----------------------------------------------------------------------===// // DominatorTreeAnalysis and related pass implementations //===----------------------------------------------------------------------===// Index: llvm/lib/Transforms/Scalar/GVN.cpp =================================================================== --- llvm/lib/Transforms/Scalar/GVN.cpp +++ llvm/lib/Transforms/Scalar/GVN.cpp @@ -1994,6 +1994,31 @@ return Changed; } +bool GVN::inferCollectiveDominance(Function &F) { + bool Changed = false; + for (auto &I : EquivalenceSources) { + DominanceSources &Sources = I.second; + if (Sources.Edges.size() + Sources.BBs.size() < 2) + continue; + Value *From = I.first.first; + Value *To = I.first.second; + Changed |= llvm::replaceDominatedUsesWith(From, To, F, *DT, Sources); + } + return Changed; +} + +bool GVN::replaceDominatedUsesWith(Value *From, Value *To, + const BasicBlockEdge &Root) { + EquivalenceSources[{From, To}].Edges.push_back(Root); + return llvm::replaceDominatedUsesWith(From, To, *DT, Root); +} + +bool GVN::replaceDominatedUsesWith(Value *From, Value *To, + const BasicBlock *BB) { + EquivalenceSources[{From, To}].BBs.insert(BB); + return llvm::replaceDominatedUsesWith(From, To, *DT, BB); +} + /// The given values are known to be equal in every block /// dominated by 'Root'. Exploit this, for example by replacing 'LHS' with /// 'RHS' everywhere in the scope. Returns whether a change was made. @@ -2059,8 +2084,8 @@ if (!LHS->hasOneUse()) { unsigned NumReplacements = DominatesByEdge - ? replaceDominatedUsesWith(LHS, RHS, *DT, Root) - : replaceDominatedUsesWith(LHS, RHS, *DT, Root.getStart()); + ? replaceDominatedUsesWith(LHS, RHS, Root) + : replaceDominatedUsesWith(LHS, RHS, Root.getStart()); Changed |= NumReplacements > 0; NumGVNEqProp += NumReplacements; @@ -2122,9 +2147,8 @@ if (NotCmp && isa(NotCmp)) { unsigned NumReplacements = DominatesByEdge - ? replaceDominatedUsesWith(NotCmp, NotVal, *DT, Root) - : replaceDominatedUsesWith(NotCmp, NotVal, *DT, - Root.getStart()); + ? replaceDominatedUsesWith(NotCmp, NotVal, Root) + : replaceDominatedUsesWith(NotCmp, NotVal, Root.getStart()); Changed |= NumReplacements > 0; NumGVNEqProp += NumReplacements; // Cached information for anything that uses NotCmp will be invalid. @@ -2712,6 +2736,8 @@ for (BasicBlock *BB : RPOT) Changed |= processBlock(BB); + Changed |= inferCollectiveDominance(F); + return Changed; } @@ -2722,6 +2748,7 @@ TableAllocator.Reset(); ICF->clear(); InvalidBlockRPONumbers = true; + EquivalenceSources.clear(); } /// Verify that the specified instruction does not occur in our Index: llvm/lib/Transforms/Utils/Local.cpp =================================================================== --- llvm/lib/Transforms/Utils/Local.cpp +++ llvm/lib/Transforms/Utils/Local.cpp @@ -2663,6 +2663,28 @@ return ::replaceDominatedUsesWith(From, To, BB, ProperlyDominates); } +unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To, + Function &F, DominatorTree &DT, + DominanceSources &Sources) { + // Accumulate dominance in Sources. + bool Change = true; + while (Change) { + Change = false; + for (BasicBlock &BB : F) + if (!Sources.BBs.count(&BB) && Sources.dominates(&BB, DT)) { + Sources.BBs.insert(&BB); + Change = true; + } + } + + unsigned NumUsesPrior = From->getNumUses(); + From->replaceUsesWithIf(To, [Sources](Use &U) { + BasicBlock *UBB = cast(U.getUser())->getParent(); + return Sources.BBs.count(UBB); + }); + return NumUsesPrior - From->getNumUses(); +} + bool llvm::callsGCLeafFunction(const CallBase *Call, const TargetLibraryInfo &TLI) { // Check if the function is specifically marked as a gc leaf function. Index: llvm/test/Transforms/builtin_constant.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/builtin_constant.ll @@ -0,0 +1,193 @@ +; RUN: opt < %s -O3 -S | FileCheck %s +; +; Check that the inline asm call has a constant 0 as argument. This can be +; inferred from the function (which is derived from the Linux Kernel) where +; nested __builtin_constant_p:s are used. Since the inline asm operand +; constraint is "i", the argument must really be a constant 0, or else +; compilation will fail. This test is run with '-O3' and not just '-gvn', +; since it seems there is a newgvn which is supposed to replace gvn some day. + +@a = global i32 0 +@b = global i32 0 + +define void @fun0() { +; CHECK-LABEL: @fun0 +; CHECK: tail call void asm sideeffect "", "i"(i32 0) +entry: + %tmp = alloca i32, align 4 + %0 = load i32, i32* @a + %shl = shl i32 %0, 12 + store i32 %shl, i32* @b + %1 = load i32, i32* @b + %cmp = icmp sgt i32 %1, -129 + br i1 %cmp, label %land.lhs.true, label %land.end + +land.lhs.true: ; preds = %entry + %2 = load i32, i32* @b + %cmp1 = icmp slt i32 %2, 128 + br i1 %cmp1, label %land.rhs, label %land.end + +land.rhs: ; preds = %land.lhs.true + %3 = load i32, i32* @b + %4 = call i1 @llvm.is.constant.i32(i32 %3) + br label %land.end + +land.end: ; preds = %land.rhs, %land.lhs.true, %entry + %5 = phi i1 [ false, %land.lhs.true ], [ false, %entry ], [ %4, %land.rhs ] + %land.ext = zext i1 %5 to i32 + %6 = call i1 @llvm.is.constant.i32(i32 %land.ext) + br i1 %6, label %cond.true, label %cond.false + +cond.true: ; preds = %land.end + %7 = load i32, i32* @b + %cmp2 = icmp sgt i32 %7, -129 + br i1 %cmp2, label %land.lhs.true3, label %if.end + +land.lhs.true3: ; preds = %cond.true + %8 = load i32, i32* @b + %cmp4 = icmp slt i32 %8, 128 + br i1 %cmp4, label %land.lhs.true5, label %if.end + +land.lhs.true5: ; preds = %land.lhs.true3 + %9 = load i32, i32* @b + %10 = call i1 @llvm.is.constant.i32(i32 %9) + br i1 %10, label %if.then, label %if.end + +cond.false: ; preds = %land.end + %11 = load i32, i32* @b + %cmp6 = icmp sgt i32 %11, -129 + br i1 %cmp6, label %land.lhs.true7, label %land.end10 + +land.lhs.true7: ; preds = %cond.false + %12 = load i32, i32* @b + %cmp8 = icmp slt i32 %12, 128 + br i1 %cmp8, label %land.rhs9, label %land.end10 + +land.rhs9: ; preds = %land.lhs.true7 + %13 = load i32, i32* @b + %14 = call i1 @llvm.is.constant.i32(i32 %13) + br label %land.end10 + +land.end10: ; preds = %land.rhs9, %land.lhs.true7, %cond.false + %15 = phi i1 [ false, %land.lhs.true7 ], [ false, %cond.false ], [ %14, %land.rhs9 ] + %16 = zext i1 %15 to i64 + %cond = select i1 %15, i32 1, i32 0 + store i32 %cond, i32* %tmp + %17 = load i32, i32* %tmp + %tobool = icmp ne i32 %17, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %land.end10, %land.lhs.true5 + %18 = load i32, i32* @b + call void asm sideeffect "", "i"(i32 %18) + br label %if.end + +if.end: ; preds = %if.then, %land.end10, %land.lhs.true5, %land.lhs.true3, %cond.true + ret void +} + +%struct.b = type { i64 } +@d = dso_local global i64 0, align 8 +@c = dso_local global %struct.b* null, align 8 +@.src = private unnamed_addr constant [22 x i8] c"tc_builtin_constant.c\00", align 2 +@0 = private unnamed_addr constant { i16, i16, [4 x i8] } { i16 -1, i16 0, [4 x i8] c"'b'\00" } +@1 = private unnamed_addr global { { [22 x i8]*, i32, i32 }, { i16, i16, [4 x i8] }*, i8, i8 } { { [22 x i8]*, i32, i32 } { [22 x i8]* @.src, i32 24, i32 22 }, { i16, i16, [4 x i8] }* @0, i8 1, i8 3 } + +define void @fun1() { +; CHECK-LABEL: @fun1 +; CHECK: tail call void asm "", "=*Q,i,*Q"(i64* %a, i64 0, i64* %a) +entry: + %f = alloca i64, align 8 + %tmp = alloca i32, align 4 + %0 = bitcast i64* %f to i8* + %1 = load i64, i64* @d, align 8 + %shl = shl i64 %1, 12 + store i64 %shl, i64* %f, align 8 + %2 = load i64, i64* %f, align 8 + %cmp = icmp sgt i64 %2, -129 + br i1 %cmp, label %land.lhs.true, label %land.end + +land.lhs.true: ; preds = %entry + %3 = load i64, i64* %f, align 8 + %cmp1 = icmp slt i64 %3, 128 + br i1 %cmp1, label %land.rhs, label %land.end + +land.rhs: ; preds = %land.lhs.true + %4 = load i64, i64* %f, align 8 + %5 = call i1 @llvm.is.constant.i64(i64 %4) + br label %land.end + +land.end: ; preds = %land.rhs, %land.lhs.true, %entry + %6 = phi i1 [ false, %land.lhs.true ], [ false, %entry ], [ %5, %land.rhs ] + %land.ext = zext i1 %6 to i32 + %7 = call i1 @llvm.is.constant.i32(i32 %land.ext) + br i1 %7, label %cond.true, label %cond.false + +cond.true: ; preds = %land.end + %8 = load i64, i64* %f, align 8 + %cmp2 = icmp sgt i64 %8, -129 + br i1 %cmp2, label %land.lhs.true3, label %if.end + +land.lhs.true3: ; preds = %cond.true + %9 = load i64, i64* %f, align 8 + %cmp4 = icmp slt i64 %9, 128 + br i1 %cmp4, label %land.lhs.true5, label %if.end + +land.lhs.true5: ; preds = %land.lhs.true3 + %10 = load i64, i64* %f, align 8 + %11 = call i1 @llvm.is.constant.i64(i64 %10) + br i1 %11, label %if.then, label %if.end + +cond.false: ; preds = %land.end + %12 = load i64, i64* %f, align 8 + %cmp6 = icmp sgt i64 %12, -129 + br i1 %cmp6, label %land.lhs.true7, label %land.end10 + +land.lhs.true7: ; preds = %cond.false + %13 = load i64, i64* %f, align 8 + %cmp8 = icmp slt i64 %13, 128 + br i1 %cmp8, label %land.rhs9, label %land.end10 + +land.rhs9: ; preds = %land.lhs.true7 + %14 = load i64, i64* %f, align 8 + %15 = call i1 @llvm.is.constant.i64(i64 %14) + br label %land.end10 + +land.end10: ; preds = %land.rhs9, %land.lhs.true7, %cond.false + %16 = phi i1 [ false, %land.lhs.true7 ], [ false, %cond.false ], [ %15, %land.rhs9 ] + %17 = zext i1 %16 to i64 + %cond = select i1 %16, i32 1, i32 0 + store i32 %cond, i32* %tmp, align 4 + %18 = load i32, i32* %tmp, align 4 + %tobool = icmp ne i32 %18, 0 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %land.end10, %land.lhs.true5 + %19 = load %struct.b*, %struct.b** @c, align 8 + %20 = bitcast %struct.b* %19 to i8*, !nosanitize !10 + %21 = call i64 @llvm.objectsize.i64.p0i8(i8* %20, i1 false, i1 false, i1 false), !nosanitize !10 + %22 = icmp uge i64 %21, 8, !nosanitize !10 + br i1 %22, label %cont, label %handler.type_mismatch, !nosanitize !10 + +handler.type_mismatch: ; preds = %if.then + %23 = ptrtoint %struct.b* %19 to i64, !nosanitize !10 + call void @__ubsan_handle_type_mismatch_v1(i8* bitcast ({ { [22 x i8]*, i32, i32 }, { i16, i16, [4 x i8] }*, i8, i8 }* @1 to i8*), i64 %23) #5, !nosanitize !10 + br label %cont, !nosanitize !10 + +cont: ; preds = %handler.type_mismatch, %if.then + %a = getelementptr inbounds %struct.b, %struct.b* %19, i32 0, i32 0 + %24 = load i64, i64* %f, align 8 + call void asm "", "=*Q,i,*Q"(i64* %a, i64 %24, i64* %a) + br label %if.end + +if.end: ; preds = %cont, %land.end10, %land.lhs.true5, %land.lhs.true3, %cond.true + %25 = bitcast i64* %f to i8* + ret void +} + +declare i1 @llvm.is.constant.i64(i64) +declare i1 @llvm.is.constant.i32(i32) + +declare i64 @llvm.objectsize.i64.p0i8(i8*, i1 immarg, i1 immarg, i1 immarg) +declare dso_local void @__ubsan_handle_type_mismatch_v1(i8*, i64) +!10 = !{}