diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstraintSystem.h" #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" @@ -26,11 +27,14 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Verifier.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/DebugCounter.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ValueMapper.h" #include #include @@ -48,6 +52,10 @@ MaxRows("constraint-elimination-max-rows", cl::init(500), cl::Hidden, cl::desc("Maximum number of rows to keep in constraint system")); +static cl::opt DumpReproducers( + "constraint-elimination-dump-repros", cl::init(false), cl::Hidden, + cl::desc("Dump IR to reproduce successful transformations.")); + static int64_t MaxConstraintValue = std::numeric_limits::max(); static int64_t MinSignedConstraintValue = std::numeric_limits::min(); @@ -767,7 +775,135 @@ FactOrCheck::getFact(DT.getNode(Br->getSuccessor(1)), CmpI, true)); } -static bool checkAndReplaceCondition(CmpInst *Cmp, ConstraintInfo &Info) { +struct ReproducerEntry { + CmpInst *Cond; + bool IsNot; + + ReproducerEntry(CmpInst *Cond, bool IsNot) : Cond(Cond), IsNot(IsNot) {} +}; + +/// Helper function to generate a reproducer function for simplifying \p Cond. +/// The reproducer function contains a series of @llvm.assume calls, one for +/// each condition in \p Stack. For each condition, the operand instruction are +/// cloned until we reach operands that have an entry in \p Value2Index. Those +/// will then be added as function arguments. +static void generateReproducer(CmpInst *Cond, Module *M, + ArrayRef Stack, + DenseMap &Value2Index, + DominatorTree &DT) { + if (!M) + return; + + LLVMContext &Ctx = Cond->getContext(); + + ValueToValueMapTy Old2New; + SmallVector Args; + SmallPtrSet Seen; + bool IsSigned = CmpInst::isSigned(Cond->getPredicate()); + auto CollectArguments = [&](CmpInst *Cond) { + if (!Cond || IsSigned != CmpInst::isSigned(Cond->getPredicate())) + return; + SmallVector WorkList; + WorkList.push_back(Cond); + while (!WorkList.empty()) { + Value *V = WorkList.pop_back_val(); + if (!Seen.insert(V).second) + continue; + if (Old2New.find(V) != Old2New.end()) + continue; + + if (isa(V) || isa(V)) + continue; + + auto *I = dyn_cast(V); + if (Value2Index.find(V) != Value2Index.end() || !I || + !isa(V)) { + Old2New[V] = V; + Args.push_back(V); + } else { + append_range(WorkList, I->operands()); + } + } + }; + + for (auto &Entry : Stack) + CollectArguments(Entry.Cond); + CollectArguments(Cond); + + SmallVector ParamTys; + for (auto *P : Args) + ParamTys.push_back(P->getType()); + + FunctionType *FTy = FunctionType::get(Cond->getType(), ParamTys, + /*isVarArg=*/false); + Function *F = Function::Create(FTy, Function::ExternalLinkage, + Cond->getModule()->getName() + + Cond->getFunction()->getName() + "repro", + M); + for (unsigned I = 0; I < Args.size(); ++I) + F->getArg(I)->setName(Args[I]->getName()); + + for (unsigned i = 0; i < Args.size(); i++) { + Old2New[Args[i]] = F->getArg(i); + } + + BasicBlock *Entry = BasicBlock::Create(Ctx, "entry", F); + IRBuilder<> Builder(Entry); + Builder.CreateRet(Builder.getTrue()); + Builder.SetInsertPoint(Entry->getTerminator()); + + auto CloneInstructions = [&](ArrayRef Ops) { + SmallVector WorkList(Ops); + SmallVector ToClone; + while (!WorkList.empty()) { + Value *V = WorkList.pop_back_val(); + if (Old2New.find(V) != Old2New.end()) + continue; + + auto *I = dyn_cast(V); + if (Value2Index.find(V) == Value2Index.end() && I) { + Old2New[V] = nullptr; + ToClone.push_back(I); + append_range(WorkList, I->operands()); + } + } + + sort(ToClone, + [&DT](Instruction *A, Instruction *B) { return DT.dominates(A, B); }); + for (Instruction *I : ToClone) { + Old2New[I] = I->clone(); + Old2New[I]->setName(I->getName()); + cast(Old2New[I])->insertBefore(&*Builder.GetInsertPoint()); + cast(Old2New[I])->dropUnknownNonDebugMetadata(); + cast(Old2New[I])->setDebugLoc({}); + } + }; + for (auto &Entry : Stack) { + if (!Entry.Cond || CmpInst::isSigned(Cond->getPredicate()) != + CmpInst::isSigned(Entry.Cond->getPredicate())) + continue; + + CmpInst::Predicate Pred = Entry.Cond->getPredicate(); + if (Entry.IsNot) + Pred = CmpInst::getInversePredicate(Pred); + + CloneInstructions({Entry.Cond->getOperand(0), Entry.Cond->getOperand(1)}); + + auto *Cmp = Builder.CreateICmp(Pred, Entry.Cond->getOperand(0), + Entry.Cond->getOperand(1)); + Builder.CreateAssumption(Cmp); + } + + CloneInstructions(Cond); + Entry->getTerminator()->setOperand(0, Cond); + remapInstructionsInBlocks({Entry}, Old2New); + + assert(!verifyFunction(*F, &dbgs())); +} + +static bool checkAndReplaceCondition( + CmpInst *Cmp, ConstraintInfo &Info, Module *ReproducerModule, + ArrayRef ReproducerCondStack, DominatorTree &DT) { LLVM_DEBUG(dbgs() << "Checking " << *Cmp << "\n"); CmpInst::Predicate Pred = Cmp->getPredicate(); @@ -797,6 +933,9 @@ if (!DebugCounter::shouldExecute(EliminatedCounter)) return false; + generateReproducer(Cmp, ReproducerModule, ReproducerCondStack, + Info.getValue2Index(R.IsSigned), DT); + LLVM_DEBUG({ dbgs() << "Condition " << *Cmp << " implied by dominating constraints\n"; dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned)); @@ -816,6 +955,9 @@ if (!DebugCounter::shouldExecute(EliminatedCounter)) return false; + generateReproducer(Cmp, ReproducerModule, ReproducerCondStack, + Info.getValue2Index(R.IsSigned), DT); + LLVM_DEBUG({ dbgs() << "Condition !" << *Cmp << " implied by dominating constraints\n"; dumpWithNames(CSToUse, Info.getValue2Index(R.IsSigned)); @@ -940,12 +1082,15 @@ return Changed; } -static bool eliminateConstraints(Function &F, DominatorTree &DT) { +static bool eliminateConstraints(Function &F, DominatorTree &DT, + OptimizationRemarkEmitter &ORE) { bool Changed = false; DT.updateDFSNumbers(); ConstraintInfo Info(F.getParent()->getDataLayout()); State S(DT); + std::unique_ptr ReproducerModule( + DumpReproducers ? new Module(F.getName(), F.getContext()) : nullptr); // First, collect conditions implied by branches and blocks with their // Dominator DFS in and out numbers. @@ -989,6 +1134,7 @@ // Finally, process ordered worklist and eliminate implied conditions. SmallVector DFSInStack; + SmallVector ReproducerCondStack; for (FactOrCheck &CB : S.WorkList) { // First, pop entries from the stack that are out-of-scope for CB. Remove // the corresponding entry from the constraint system. @@ -1014,6 +1160,8 @@ Mapping.erase(V); Info.popLastNVariables(E.IsSigned, E.ValuesToRelease.size()); DFSInStack.pop_back(); + if (ReproducerModule) + ReproducerCondStack.pop_back(); } LLVM_DEBUG({ @@ -1031,7 +1179,8 @@ if (auto *II = dyn_cast(CB.Inst)) { Changed |= tryToSimplifyOverflowMath(II, Info, ToRemove); } else if (auto *Cmp = dyn_cast(CB.Inst)) { - Changed |= checkAndReplaceCondition(Cmp, Info); + Changed |= checkAndReplaceCondition(Cmp, Info, ReproducerModule.get(), + ReproducerCondStack, S.DT); } continue; } @@ -1053,10 +1202,32 @@ Pred = CmpInst::getInversePredicate(Pred); Info.addFact(Pred, A, B, CB.NumIn, CB.NumOut, DFSInStack); + if (ReproducerModule && DFSInStack.size() > ReproducerCondStack.size()) + ReproducerCondStack.emplace_back(cast(Cmp), CB.Not); + Info.transferToOtherSystem(Pred, A, B, CB.NumIn, CB.NumOut, DFSInStack); + if (ReproducerModule && DFSInStack.size() > ReproducerCondStack.size()) { + // Add dummy entries to ReproducerCondStack to keep it in sync with + // DFSInStack. + for (unsigned I = 0, + E = (DFSInStack.size() - ReproducerCondStack.size()); + I < E; ++I) { + ReproducerCondStack.emplace_back(nullptr, false); + } + } } } + if (ReproducerModule && !ReproducerModule->functions().empty()) { + std::string S; + raw_string_ostream StringS(S); + ReproducerModule->print(StringS, nullptr); + StringS.flush(); + OptimizationRemark Rem(DEBUG_TYPE, "Reproducer", &F); + Rem << ore::NV("module") << S; + ORE.emit(Rem); + } + #ifndef NDEBUG unsigned SignedEntries = count_if(DFSInStack, [](const StackEntry &E) { return E.IsSigned; }); @@ -1074,7 +1245,8 @@ PreservedAnalyses ConstraintEliminationPass::run(Function &F, FunctionAnalysisManager &AM) { auto &DT = AM.getResult(F); - if (!eliminateConstraints(F, DT)) + auto &ORE = AM.getResult(F); + if (!eliminateConstraints(F, DT, ORE)) return PreservedAnalyses::all(); PreservedAnalyses PA; diff --git a/llvm/test/Transforms/ConstraintElimination/analysis-invalidation.ll b/llvm/test/Transforms/ConstraintElimination/analysis-invalidation.ll --- a/llvm/test/Transforms/ConstraintElimination/analysis-invalidation.ll +++ b/llvm/test/Transforms/ConstraintElimination/analysis-invalidation.ll @@ -11,6 +11,7 @@ ; CHECK-NEXT: Running analysis: TargetIRAnalysis on ssub_no_overflow_due_to_or_conds ; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on ssub_no_overflow_due_to_or_conds ; CHECK-NEXT: Running pass: ConstraintEliminationPass on ssub_no_overflow_due_to_or_conds +; CHECK-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on ssub_no_overflow_due_to_or_conds ; CHECK-NEXT: Invalidating analysis: DemandedBitsAnalysis on ssub_no_overflow_due_to_or_conds ; CHECK-NEXT: Running pass: RequireAnalysisPass ; CHECK-NEXT: Running analysis: DemandedBitsAnalysis on ssub_no_overflow_due_to_or_conds @@ -21,6 +22,7 @@ ; CHECK-NEXT: Running analysis: TargetIRAnalysis on uge_zext ; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on uge_zext ; CHECK-NEXT: Running pass: ConstraintEliminationPass on uge_zext +; CHECK-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis on uge_zext ; CHECK-NEXT: Invalidating analysis: DemandedBitsAnalysis on uge_zext ; CHECK-NEXT: Running pass: RequireAnalysisPass ; CHECK-NEXT: Running analysis: DemandedBitsAnalysis on uge_zext diff --git a/llvm/test/Transforms/ConstraintElimination/reproducer-remarks.ll b/llvm/test/Transforms/ConstraintElimination/reproducer-remarks.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ConstraintElimination/reproducer-remarks.ll @@ -0,0 +1,241 @@ +; RUN: opt -passes=constraint-elimination -constraint-elimination-dump-repros -pass-remarks=constraint-elimination %s 2>&1 | FileCheck %s + +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +declare void @use(i1) + +declare void @llvm.assume(i1) + +define i1 @test_no_known_facts(ptr %dst) { +; CHECK: remark: :0:0: module; ModuleID = 'test_no_known_facts' +; CHECK-LABEL: define i1 @"{{.+}}test_no_known_factsrepro"(ptr %dst) +; CHECK-NEXT: entry: +; CHECK-NEXT: %dst.0 = getelementptr inbounds ptr, ptr %dst, i64 0 +; CHECK-NEXT: %upper = getelementptr inbounds ptr, ptr %dst, i64 2 +; CHECK-NEXT: %c = icmp ult ptr %dst.0, %upper +; CHECK-NEXT: ret i1 %c +; CHECK-NEXT: } +; +entry: + %dst.0 = getelementptr inbounds ptr, ptr %dst, i64 0 + %upper = getelementptr inbounds ptr, ptr %dst, i64 2 + %c = icmp ult i32* %dst.0, %upper + ret i1 %c +} + +define void @test_one_known_fact_true_branch(i8 %start, i8 %high) { +; CHECK: remark: :0:0: module; ModuleID = 'test_one_known_fact_true_branch' + +; CHECK-LABEL: define i1 @"{{.*}}test_one_known_fact_true_branchrepro"(i8 %high, i8 %start) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %add.ptr.i = add nuw i8 %start, 3 +; CHECK-NEXT: %0 = icmp ult i8 %add.ptr.i, %high +; CHECK-NEXT: call void @llvm.assume(i1 %0) +; CHECK-NEXT: %t.0 = icmp ult i8 %start, %high +; CHECK-NEXT: ret i1 %t.0 +; CHECK-NEXT: } +; +entry: + %add.ptr.i = add nuw i8 %start, 3 + %c.1 = icmp ult i8 %add.ptr.i, %high + br i1 %c.1, label %if.then, label %if.end + +if.then: + %t.0 = icmp ult i8 %start, %high + call void @use(i1 %t.0) + ret void + +if.end: + ret void +} + +define void @test_one_known_fact_false_branch(i8 %start, i8 %high) { +; CHECK: remark: :0:0: module; ModuleID = 'test_one_known_fact_false_branch' +; +; CHECK-LABEL:define i1 @"{{.*}}test_one_known_fact_false_branchrepro"(i8 %high, i8 %start) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %add.ptr.i = add nuw i8 %start, 3 +; CHECK-NEXT: %0 = icmp ult i8 %add.ptr.i, %high +; CHECK-NEXT: call void @llvm.assume(i1 %0) +; CHECK-NEXT: %t.0 = icmp ult i8 %start, %high +; CHECK-NEXT: ret i1 %t.0 +; CHECK-NEXT: } +; +entry: + %add.ptr.i = add nuw i8 %start, 3 + %c.1 = icmp uge i8 %add.ptr.i, %high + br i1 %c.1, label %if.then, label %if.end + +if.then: + ret void + +if.end: + %t.0 = icmp ult i8 %start, %high + call void @use(i1 %t.0) + ret void +} + +define void @test_multiple_known_facts_branches_1(i8 %a, i8 %b) { +; CHECK: remark: :0:0: module; ModuleID = 'test_multiple_known_facts_branches_1' + +; CHECK-LABEL: define i1 @"{{.*}}test_multiple_known_facts_branches_1repro"(i8 %a, i8 %b) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = icmp ugt i8 %a, 10 +; CHECK-NEXT: call void @llvm.assume(i1 %0) +; CHECK-NEXT: %1 = icmp ugt i8 %b, 10 +; CHECK-NEXT: call void @llvm.assume(i1 %1) +; CHECK-NEXT: %add = add nuw i8 %a, %b +; CHECK-NEXT: %t.0 = icmp ugt i8 %add, 20 +; CHECK-NEXT: ret i1 %t.0 +; CHECK-NEXT: } +; +entry: + %c.1 = icmp ugt i8 %a, 10 + br i1 %c.1, label %then.1, label %else.1 + +then.1: + %c.2 = icmp ugt i8 %b, 10 + br i1 %c.2, label %then.2, label %else.1 + +then.2: + %add = add nuw i8 %a, %b + %t.0 = icmp ugt i8 %add, 20 + call void @use(i1 %t.0) + ret void + +else.1: + ret void +} + +define void @test_multiple_known_facts_branches_2(i8 %a, i8 %b) { +; CHECK: remark: :0:0: module; ModuleID = 'test_multiple_known_facts_branches_2' +; +; CHECK-LABEL: define i1 @"{{.*}}test_multiple_known_facts_branches_2repro"(i8 %a, i8 %b) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = icmp ugt i8 %a, 10 +; CHECK-NEXT: call void @llvm.assume(i1 %0) +; CHECK-NEXT: %1 = icmp ugt i8 %b, 10 +; CHECK-NEXT: call void @llvm.assume(i1 %1) +; CHECK-NEXT: %add = add nuw i8 %a, %b +; CHECK-NEXT: %t.0 = icmp ugt i8 %add, 20 +; CHECK-NEXT: ret i1 %t.0 +; CHECK-NEXT: } +; +entry: + %c.1 = icmp ugt i8 %a, 10 + br i1 %c.1, label %then.1, label %exit + +then.1: + %c.2 = icmp ule i8 %b, 10 + br i1 %c.2, label %exit, label %else.2 + +else.2: + %add = add nuw i8 %a, %b + %t.0 = icmp ugt i8 %add, 20 + call void @use(i1 %t.0) + ret void + +exit: + ret void +} + +define void @test_assumes(i8 %a, i8 %b) { +; CHECK-LABEL: define i1 @"{{.*}}test_assumesrepro.2"(i8 %a, i8 %b) { +; CHECK-NEXT: entry: +; CHECK-NEXT: %0 = icmp ugt i8 %a, 10 +; CHECK-NEXT: call void @llvm.assume(i1 %0) +; CHECK-NEXT: %1 = icmp ugt i8 %b, 10 +; CHECK-NEXT: call void @llvm.assume(i1 %1) +; CHECK-NEXT: %add = add nuw i8 %a, %b +; CHECK-NEXT: %t.0 = icmp ult i8 %add, 20 +; CHECK-NEXT: ret i1 %t.0 +; CHECK-NEXT: } +; +entry: + %c.1 = icmp ugt i8 %a, 10 + call void @llvm.assume(i1 %c.1) + %c.2 = icmp ugt i8 %b, 10 + call void @llvm.assume(i1 %c.2) + %add = add nuw i8 %a, %b + %t.0 = icmp ult i8 %add, 20 + call void @use(i1 %t.0) + ret void +} + +declare void @noundef(ptr noundef) + +define i1 @test_inbounds_precondition(ptr %src, i32 %n, i32 %idx) { +entry: + %upper = getelementptr inbounds i32, ptr %src, i64 5 + %src.idx.4 = getelementptr i32, ptr %src, i64 4 + %cmp.upper.4 = icmp ule ptr %src.idx.4, %upper + br i1 %cmp.upper.4, label %then, label %else + +then: + ret i1 true + +else: + ret i1 false +} + +define i32 @test_branch(i32 %a) { +entry: + %c.1 = icmp ult i32 %a, 0 + br i1 %c.1, label %then, label %exit + +then: + %c.2 = icmp ugt i32 0, 0 + br label %exit + +exit: + ret i32 0 +} + +define i32 @test_invoke(i32 %a) personality ptr null { +entry: + %call = invoke ptr null(i64 0) + to label %cont unwind label %lpad + +cont: + %l = load i32, ptr %call, align 4 + %c.1 = icmp slt i32 %a, %l + br i1 %c.1, label %then, label %exit + +lpad: + %lp = landingpad { ptr, i32 } + catch ptr null + catch ptr null + ret i32 0 + +then: + %c.2 = icmp eq i32 0, 0 + br label %exit + +exit: + ret i32 0 +} + +define <2 x i1> @vector_cmp(<2 x ptr> %vec) { +; + %gep.1 = getelementptr inbounds i32, <2 x ptr> %vec, i64 1 + %t.1 = icmp ult <2 x ptr> %vec, %gep.1 + ret <2 x i1> %t.1 +} + +define i1 @shared_operand() { +entry: + %sub = sub i8 0, 0 + %sub.2 = sub nuw i8 %sub, 0 + %c.5 = icmp ult i8 %sub.2, %sub + ret i1 %c.5 +} + + +@glob = external global i32 + +define i1 @load_global() { +entry: + %l = load i32, ptr @glob, align 8 + %c = icmp ugt i32 %l, %l + ret i1 %c +} diff --git a/llvm/tools/opt-viewer/extract-reproducers.py b/llvm/tools/opt-viewer/extract-reproducers.py new file mode 100644 --- /dev/null +++ b/llvm/tools/opt-viewer/extract-reproducers.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +desc = '''''' + +import optrecord +import argparse + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description=desc) + parser.add_argument( + 'yaml_dirs_or_files', + nargs='+', + help='List of optimization record files or directories searched ' + 'for optimization record files.') + + args = parser.parse_args() + + print_progress = False + jobs = 1 + + files = optrecord.find_opt_files(*args.yaml_dirs_or_files) + if not files: + parser.error("No *.opt.yaml files found") + sys.exit(1) + + all_remarks, file_remarks, _ = optrecord.gather_results( + files, jobs, True) + + i = 0 + for r in all_remarks: + if r[1] != 'constraint-elimination' or r[2] != 'Reproducer': + continue + with open('reproducer{}.ll'.format(i), 'wt') as f: + f.write(r[7][1][0][1]) + i += 1