diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp --- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -20,6 +20,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" @@ -49,6 +50,10 @@ "bonus-inst-threshold", cl::Hidden, cl::init(1), cl::desc("Control the number of bonus instructions (default = 1)")); +static cl::opt TailMergeBBsEndingInUnreachable( + "tail-merge-bbs-ending-in-unreachable", cl::Hidden, cl::init(false), + cl::desc("Whether to tail merge basic blocks ending in unreachable")); + static cl::opt UserKeepLoops( "keep-loops", cl::Hidden, cl::init(true), cl::desc("Preserve canonical loop structure (default = true)")); @@ -143,6 +148,278 @@ return Changed; } +/// Advance 'I' to the next non-debug instruction. Stop before the end. +static void iterateNextNonDbgInstr(BasicBlock::iterator &I, + BasicBlock::iterator E) { + do { + ++I; + } while (I != E && isa(&*I)); +} + +static bool isConstantOrPhiOfConstants(const Value *V, const BasicBlock *BB) { + if (isa(V)) + return true; + if (auto *P = dyn_cast(V)) { + if (P->getParent() == BB) + return all_of(P->operands(), [](Value *O) { return isa(O); }); + } + return false; +} + +static uint64_t computeBlockHash(BasicBlock &BB) { + hash_code Hash(0); + Instruction *Start = BB.getFirstNonPHIOrDbg(); + for (auto I = Start->getIterator(), E = BB.end(); I != E; + iterateNextNonDbgInstr(I, E)) { + // An instruction's identity is its opcode and its non-constant operands. + // FIXME: This could be more precise if the operand is a value inside the + // block we are considering merging. + SmallVector ValueOperands; + for (Value *O : I->operands()) { + if (!isConstantOrPhiOfConstants(O, &BB)) + ValueOperands.push_back(O); + } + + // Add constant call targets to the identity. We don't want to make indirect + // calls by phi-ing constant call targets. + if (auto *Call = dyn_cast(&*I)) { + if (Function *F = Call->getCalledFunction()) + ValueOperands.push_back(F); + } + + hash_code OperandHash = + hash_combine_range(ValueOperands.begin(), ValueOperands.end()); + Hash = hash_combine(Hash, I->getOpcode(), OperandHash); + } + return Hash; +} + +/// Merge together two noreturn blocks that are almost identical. Allow constant +/// operands and phis of constant operands to differ. +static bool +tailMergeNoReturnBlocksIfProfitable(BasicBlock *BB1, BasicBlock *BB2, + SmallPtrSetImpl &BB1Preds, + unsigned PhiSizeEstimate) { + // Do one pass to check if this transformation is possible. Skip over phi + // nodes. They will be checked when they are used within the block, or they + // must be dead because noreturn blocks don't have any successors. + auto I1 = BB1->getFirstNonPHIOrDbg()->getIterator(); + auto E1 = BB1->end(); + auto I2 = BB2->getFirstNonPHIOrDbg()->getIterator(); + auto E2 = BB2->end(); + for (; I1 != E1 && I2 != E2; + iterateNextNonDbgInstr(I1, E1), iterateNextNonDbgInstr(I2, E2)) { + if (I1->isIdenticalTo(&*I2)) + continue; + if (!I1->isSameOperationAs(&*I2)) { + LLVM_DEBUG(dbgs() << "noreturn tail merge of " << BB1->getName() + << " and " << BB2->getName() + << " failed due to differing instructions:\n" + << *I1 << '\n' + << *I2 << '\n'); + return false; + } + + // Operands must be identical, or both be constants. Op1 may be a phi of + // constants, since we are tail merging many blocks together into it. + auto O2I = I2->op_begin(); + for (Value *Op1 : I1->operands()) { + Value *Op2 = *O2I; + if (!(Op1 == Op2 || + (isConstantOrPhiOfConstants(Op1, BB1) && + isConstantOrPhiOfConstants(Op2, BB2)))) { + LLVM_DEBUG(dbgs() << "noreturn tail merge of " << BB1->getName() + << " and " << BB2->getName() + << " failed due to differing operands:\n" + << *Op1 << '\n' + << *Op2 << '\n'); + return false; + } + ++O2I; + } + } + + // If either iterator didn't reach the end, we can't do the transform. + if (I1 != E1 || I2 != E2) { + LLVM_DEBUG(dbgs() << "noreturn tail merge of " << BB1->getName() << " and " + << BB2->getName() + << " failed due to differing block length\n"); + return false; + } + + LLVM_DEBUG(dbgs() << "Tail merging " << BB1->getName() << " with " + << BB2->getName() << '\n'); + + // Delete dead phis in BB1. If left behind, they would become invalid after + // updating the CFG. All live phis will be updated when we process their uses + // in this block. + for (I1 = BB1->begin(); I1 != E1;) { + if (auto *Phi = dyn_cast(&*I1++)) { + if (Phi->use_empty()) + Phi->eraseFromParent(); + } else { + break; + } + } + + // If BB1 and BB2 share predecessors and a phi node is required, BB2 cannot be + // completely replaced with BB1. Instead, BB2 will be an unconditional branch + // to BB1. + bool MustUseBranch = false; + for (BasicBlock *Pred : predecessors(BB2)) { + if (BB1Preds.count(Pred)) + MustUseBranch = true; + } + + // In a second pass, add phis for constant operands that differ and remove + // llvm.dbg.value intrinsics. + I1 = BB1->getFirstNonPHI()->getIterator(); + I2 = BB2->getFirstNonPHIOrDbg()->getIterator(); + for (; I1 != E1 && I2 != E2; ++I1, iterateNextNonDbgInstr(I2, E2)) { + // FIXME: Maybe use dbg.value undef instead? + while (isa(&*I1) && I1 != E1) + (I1++)->eraseFromParent(); + assert(I1->isSameOperationAs(&*I2)); + + // Use a merged source location. + I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); + + auto O2I = I2->op_begin(); + for (Use &U : I1->operands()) { + Value *Op1 = U; + Value *Op2 = *O2I; + if (Op1 != Op2) { + assert(isConstantOrPhiOfConstants(Op1, BB1) && + isConstantOrPhiOfConstants(Op2, BB2)); + + // We have differing constant operands. Get or create a phi in BB1. + PHINode *Phi = dyn_cast(Op1); + if (!Phi) { + // Make a new phi between constants. + Constant *C1 = cast(Op1); + Phi = PHINode::Create(C1->getType(), std::min(12U, PhiSizeEstimate), + "noreturntail", &*BB1->begin()); + for (BasicBlock *Pred : predecessors(BB1)) + Phi->addIncoming(C1, Pred); + U.set(Phi); + } + + // Add phi entries to BB1. If we must use a branch, add one phi entry. + // Otherwise, add a phi entry for every predecessor of BB2. If BB2 has a + // phi, we transfer its entries directly. If not, Op2 must be a Constant + // and it can be used in the entry. + if (MustUseBranch) { + Phi->addIncoming(Op2, BB2); + } else { + if (PHINode *Phi2 = dyn_cast(Op2)) { + for (int I = 0, E = Phi2->getNumIncomingValues(); I != E; ++I) + Phi->addIncoming(Phi2->getIncomingValue(I), + Phi2->getIncomingBlock(I)); + } else { + for (BasicBlock *Pred : predecessors(BB2)) + Phi->addIncoming(Op2, Pred); + } + } + } + ++O2I; + } + } + + if (MustUseBranch) { + // Clear out non-phis in BB2 and insert a branch to BB1. + for (I2 = BB2->getFirstNonPHI()->getIterator(); I2 != E2; ) + I2 = I2->eraseFromParent(); + BranchInst::Create(BB1, BB2); + BB1Preds.insert(BB2); + } else { + // BB2 can safely be replaced with BB1. RAUW it and delete it. All phi nodes + // must be of constants, and should have been handled above. + for (BasicBlock *Pred : predecessors(BB2)) + BB1Preds.insert(Pred); + BB2->replaceAllUsesWith(BB1); + BB2->dropAllReferences(); + BB2->eraseFromParent(); + } + + return true; +} + +/// Merge together noreturn blocks with common code. A noreturn block is a block +/// that ends in unreachable preceded by an instruction that could transfer +/// control to a caller or end the program, such as longjmp, __assert_fail, +/// __cxa_throw, or a volatile store to null. Such blocks have no successors but +/// do not return or resume exception handling. +/// +/// The goal is to canonicalize this kind of C code to the same thing: +/// if (c1) +/// abort(); +/// else if (c2) +/// abort(); +/// -> +/// if (c1 || c2) +/// abort(); +/// +/// TODO: Consider tail merging partial blocks: +/// bb1: +/// call void @foo() +/// call void @abort() +/// unreachable +/// bb2: +/// call void @abort() +/// unreachable +static bool tailMergeBBsEndingInUnreachable(Function &F) { + // Collect all non-empty BBs ending in unreachable and hash them into buckets. + MapVector> Buckets; + for (BasicBlock &BB : F) { + if (isa(BB.getTerminator()) && + BB.getFirstNonPHIOrDbg() != BB.getTerminator()) { + uint64_t Hash = computeBlockHash(BB); + auto Insertion = Buckets.insert({Hash, TinyPtrVector(&BB)}); + if (!Insertion.second) + Insertion.first->second.push_back(&BB); + } + } + + if (Buckets.empty()) + return false; + + bool Changed = false; + for (auto &HashBucket : Buckets) { + // Attempt to merge the first block in this bucket with every other block in + // the bucket. Remove the first block and all merged blocks from the bucket, + // and repeat the process until the bucket is empty. + TinyPtrVector &Bucket = HashBucket.second; + SmallPtrSet MergedBBs; + while (Bucket.size() > 1) { + LLVM_DEBUG({ + dbgs() << "tailMergeBBsEndingInUnreachable: considering"; + for (auto *BB : Bucket) + dbgs() << ' ' << BB->getName(); + dbgs() << '\n'; + }); + auto I = Bucket.begin(), E = Bucket.end(); + BasicBlock *CanonicalBlock = *I++; + MergedBBs.insert(CanonicalBlock); + SmallPtrSet Predecessors(pred_begin(CanonicalBlock), + pred_end(CanonicalBlock)); + for (; I != E; ++I) { + if (tailMergeNoReturnBlocksIfProfitable(CanonicalBlock, *I, + Predecessors, Bucket.size())) { + MergedBBs.insert(*I); + Changed = true; + } + } + + // Erase all merged BBs from the bucket. + erase_if(Bucket, + [&MergedBBs](BasicBlock *BB) { return MergedBBs.count(BB); }); + MergedBBs.clear(); + } + } + + return Changed; +} + /// Call SimplifyCFG on all the blocks in the function, /// iterating until no more changes are made. static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, @@ -175,6 +452,8 @@ const SimplifyCFGOptions &Options) { bool EverChanged = removeUnreachableBlocks(F); EverChanged |= mergeEmptyReturnBlocks(F); + if (TailMergeBBsEndingInUnreachable) + EverChanged |= tailMergeBBsEndingInUnreachable(F); EverChanged |= iterativelySimplifyCFG(F, TTI, Options); // If neither pass changed anything, we're done. diff --git a/llvm/test/CodeGen/Thumb2/setjmp_longjmp.ll b/llvm/test/CodeGen/Thumb2/setjmp_longjmp.ll --- a/llvm/test/CodeGen/Thumb2/setjmp_longjmp.ll +++ b/llvm/test/CodeGen/Thumb2/setjmp_longjmp.ll @@ -33,13 +33,7 @@ ; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4] ; CHECK-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}} ; CHECK-NEXT: bx [[DESTREG]] -; -; longjmp sequence2: -; CHECK: ldr [[TEMPREG:r[0-9]+]], [{{\s*}}[[BUFREG:r[0-9]+]], #8] -; CHECK-NEXT: mov sp, [[TEMPREG]] -; CHECK-NEXT: ldr [[DESTREG:r[0-9]+]], {{\[}}[[BUFREG]], #4] -; CHECK-NEXT: ldr r7, {{\[}}[[BUFREG]]{{\]}} -; CHECK-NEXT: bx [[DESTREG]] + define void @double_foobar() { entry: %buf = alloca [5 x i8*], align 4 @@ -61,7 +55,8 @@ br label %if.end if.else: - store volatile i32 0, i32* @g, align 4 + %p = phi i32 [ 0, %entry ], [ 1, %if.end ] + store volatile i32 %p, i32* @g, align 4 call void @llvm.eh.sjlj.longjmp(i8* %bufptr) unreachable @@ -73,16 +68,11 @@ %setjmpres2 = call i32 @llvm.eh.sjlj.setjmp(i8* %bufptr) %tobool2 = icmp ne i32 %setjmpres2, 0 - br i1 %tobool2, label %if2.then, label %if2.else + br i1 %tobool2, label %if2.then, label %if.else if2.then: store volatile i32 3, i32* @g, align 4 - br label %if2.end - -if2.else: - store volatile i32 2, i32* @g, align 4 - call void @llvm.eh.sjlj.longjmp(i8* %bufptr) - unreachable + br label %if.end if2.end: ret void diff --git a/llvm/test/CodeGen/Thumb2/thumb2-tbh.ll b/llvm/test/CodeGen/Thumb2/thumb2-tbh.ll --- a/llvm/test/CodeGen/Thumb2/thumb2-tbh.ll +++ b/llvm/test/CodeGen/Thumb2/thumb2-tbh.ll @@ -48,39 +48,39 @@ bb7.i: ; preds = %bb42.i call void @_T_addtol(%struct._T_tstr** @_T_gtol, i32 0, i8* null) nounwind - unreachable + ret i32 0 bb15.i: ; preds = %bb42.i call void @_T_addtol(%struct._T_tstr** @_T_gtol, i32 2, i8* null) nounwind - unreachable + ret i32 0 bb23.i: ; preds = %bb42.i %1 = call i32 @strlen(i8* null) nounwind readonly ; [#uses=0] - unreachable + ret i32 0 bb33.i: ; preds = %bb42.i store i32 0, i32* @_C_nextcmd, align 4 %2 = call noalias i8* @calloc(i32 21, i32 1) nounwind ; [#uses=0] - unreachable + ret i32 0 bb34.i: ; preds = %bb42.i %3 = load i32, i32* @_C_nextcmd, align 4 ; [#uses=1] %4 = add i32 %3, 1 ; [#uses=1] store i32 %4, i32* @_C_nextcmd, align 4 %5 = call noalias i8* @calloc(i32 22, i32 1) nounwind ; [#uses=0] - unreachable + ret i32 0 bb35.i: ; preds = %bb42.i %6 = call noalias i8* @calloc(i32 20, i32 1) nounwind ; [#uses=0] - unreachable + ret i32 0 bb37.i: ; preds = %bb42.i %7 = call noalias i8* @calloc(i32 14, i32 1) nounwind ; [#uses=0] - unreachable + ret i32 0 bb39.i: ; preds = %bb42.i call void @Z_fatal(i8* getelementptr ([28 x i8], [28 x i8]* @.str31, i32 0, i32 0)) nounwind - unreachable + ret i32 0 bb40.i: ; preds = %bb42.i, %bb5.i, %bb1.i2 br label %bb42.i diff --git a/llvm/test/CodeGen/X86/tail-dup-assert.ll b/llvm/test/CodeGen/X86/tail-dup-assert.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/tail-dup-assert.ll @@ -0,0 +1,101 @@ +; RUN: llc -relocation-model=pic -mtriple=x86_64-linux < %s | FileCheck %s + +; Make sure tail duplication can undo simplifycfg noreturn tail merging when +; appropriate and leave it alone when not profitable. + +@.str = private unnamed_addr constant [6 x i8] c"x < y\00", align 1 +@.str.1 = private unnamed_addr constant [10 x i8] c"y - x > 7\00", align 1 +@.str.2 = private unnamed_addr constant [11 x i8] c"y - x < 40\00", align 1 + +@__file__ = private unnamed_addr constant [6 x i8] c"t.cpp\00", align 1 +@__pretty_function__ = private unnamed_addr constant [35 x i8] c"void f(unsigned int, unsigned int)\00", align 1 + +declare void @force_stack_setup() + +; Function Attrs: noreturn +declare void @__assert_fail_2(i8*, i32) + +define void @duplicate_assert_2(i32 %x, i32 %y) { +entry: + tail call void @force_stack_setup() + %cmp = icmp ugt i32 %y, %x + br i1 %cmp, label %cond.end, label %cond.false + +cond.end: ; preds = %entry + %sub = sub i32 %y, %x + %cmp1 = icmp ugt i32 %sub, 7 + br i1 %cmp1, label %cond.end4, label %cond.false + +cond.end4: ; preds = %cond.end + %cmp6 = icmp ult i32 %sub, 40 + br i1 %cmp6, label %cond.end9, label %cond.false + +cond.end9: ; preds = %cond.end4 + ret void + +cond.false: ; preds = %cond.end4, %cond.end, %entry + %noreturntail10 = phi i32 [ 33, %entry ], [ 34, %cond.end ], [ 35, %cond.end4 ] + %noreturntail = phi i8* [ getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), %entry ], [ getelementptr inbounds ([10 x i8], [10 x i8]* @.str.1, i64 0, i64 0), %cond.end ], [ getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), %cond.end4 ] + tail call void @__assert_fail_2(i8* nonnull %noreturntail, i32 %noreturntail10) + unreachable +} + +; CHECK-LABEL: duplicate_assert_2: +; CHECK: retq +; CHECK: .LBB0_{{.*}}: +; CHECK: leaq .L.str(%rip), %rdi +; CHECK: movl $33, %esi +; CHECK: callq __assert_fail_2 +; CHECK: .LBB0_{{.*}}: +; CHECK: leaq .L.str.1(%rip), %rdi +; CHECK: movl $34, %esi +; CHECK: callq __assert_fail_2 +; CHECK: .LBB0_{{.*}}: +; CHECK: leaq .L.str.2(%rip), %rdi +; CHECK: movl $35, %esi +; CHECK: callq __assert_fail_2 + +declare void @__assert_fail_4(i8*, i8*, i32, i8*) + +define void @leave_assert_4(i32 %x, i32 %y) { +entry: + tail call void @force_stack_setup() + %cmp = icmp ugt i32 %y, %x + br i1 %cmp, label %cond.end, label %cond.false + +cond.end: ; preds = %entry + %sub = sub i32 %y, %x + %cmp1 = icmp ugt i32 %sub, 7 + br i1 %cmp1, label %cond.end4, label %cond.false + +cond.end4: ; preds = %cond.end + %cmp6 = icmp ult i32 %sub, 40 + br i1 %cmp6, label %cond.end9, label %cond.false + +cond.end9: ; preds = %cond.end4 + ret void + +cond.false: ; preds = %cond.end4, %cond.end, %entry + %noreturntail10 = phi i32 [ 33, %entry ], [ 34, %cond.end ], [ 35, %cond.end4 ] + %noreturntail = phi i8* [ getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), %entry ], [ getelementptr inbounds ([10 x i8], [10 x i8]* @.str.1, i64 0, i64 0), %cond.end ], [ getelementptr inbounds ([11 x i8], [11 x i8]* @.str.2, i64 0, i64 0), %cond.end4 ] + tail call void @__assert_fail_4(i8* %noreturntail, i8* getelementptr inbounds ([6 x i8], [6 x i8]* @__file__, i64 0, i64 0), i32 %noreturntail10, i8* getelementptr inbounds ([35 x i8], [35 x i8]* @__pretty_function__, i64 0, i64 0)) + unreachable +} + +; CHECK-LABEL: leave_assert_4: +; CHECK: retq +; CHECK: .LBB1_{{.*}}: +; CHECK: leaq .L.str(%rip), %rdi +; CHECK: movl $33, %edx +; CHECK: jmp [[fail_bb:\.LBB1_[0-9]+]] +; CHECK: .LBB1_{{.*}}: +; CHECK: leaq .L.str.1(%rip), %rdi +; CHECK: movl $34, %edx +; CHECK: jmp [[fail_bb:\.LBB1_[0-9]+]] +; CHECK: .LBB1_{{.*}}: +; CHECK: leaq .L.str.2(%rip), %rdi +; CHECK: movl $35, %edx +; CHECK: [[fail_bb]]: +; CHECK: leaq .L__file__(%rip), %rsi +; CHECK: leaq .L__pretty_function__(%rip), %rcx +; CHECK: callq __assert_fail_4@PLT diff --git a/llvm/test/Transforms/SimplifyCFG/implied-cond.ll b/llvm/test/Transforms/SimplifyCFG/implied-cond.ll --- a/llvm/test/Transforms/SimplifyCFG/implied-cond.ll +++ b/llvm/test/Transforms/SimplifyCFG/implied-cond.ll @@ -19,11 +19,11 @@ ret void out_of_bounds: - call void @foo(i64 0) + call void @foo() unreachable out_of_bounds2: - call void @foo(i64 1) + call void @bar() unreachable } @@ -45,11 +45,11 @@ ret void out_of_bounds: - call void @foo(i64 0) + call void @foo() unreachable out_of_bounds2: - call void @foo(i64 1) + call void @bar() unreachable } @@ -69,13 +69,14 @@ ret void out_of_bounds: - call void @foo(i64 0) + call void @foo() unreachable out_of_bounds2: - call void @foo(i64 1) + call void @bar() unreachable } -declare void @foo(i64) +declare void @foo() +declare void @bar() diff --git a/llvm/test/Transforms/SimplifyCFG/tail-merge-assert.ll b/llvm/test/Transforms/SimplifyCFG/tail-merge-assert.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/tail-merge-assert.ll @@ -0,0 +1,61 @@ +; RUN: opt -tail-merge-bbs-ending-in-unreachable -simplifycfg -S < %s | FileCheck %s + +; Test that we tail merge this kind of code with glibc-style assertion +; failure calls: +; #include +; void merge_glibc_asserts(unsigned x, unsigned y) { +; assert(x < y); +; assert(y - x > 7); +; assert(y - x < 40); +; } +; +; glibc's __assert_fail function takes four parameters, and it is profitable to +; phi two of them. + +@.str = private unnamed_addr constant [6 x i8] c"x < y\00", align 1 +@.str.1 = private unnamed_addr constant [6 x i8] c"t.cpp\00", align 1 +@__PRETTY_FUNCTION__._Z1fjj = private unnamed_addr constant [35 x i8] c"void f(unsigned int, unsigned int)\00", align 1 +@.str.2 = private unnamed_addr constant [10 x i8] c"y - x > 7\00", align 1 +@.str.3 = private unnamed_addr constant [11 x i8] c"y - x < 40\00", align 1 + +declare void @glibc_assert_fail(i8*, i8*, i32, i8*) + +define void @merge_glibc_asserts(i32 %x, i32 %y) { +entry: + %cmp = icmp ugt i32 %y, %x + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: ; preds = %entry + tail call void @glibc_assert_fail(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.1, i64 0, i64 0), i32 3, i8* getelementptr inbounds ([35 x i8], [35 x i8]* @__PRETTY_FUNCTION__._Z1fjj, i64 0, i64 0)) #2 + unreachable + +cond.end: ; preds = %entry + %sub = sub i32 %y, %x + %cmp1 = icmp ugt i32 %sub, 7 + br i1 %cmp1, label %cond.end4, label %cond.false3 + +cond.false3: ; preds = %cond.end + tail call void @glibc_assert_fail(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str.2, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.1, i64 0, i64 0), i32 4, i8* getelementptr inbounds ([35 x i8], [35 x i8]* @__PRETTY_FUNCTION__._Z1fjj, i64 0, i64 0)) #2 + unreachable + +cond.end4: ; preds = %cond.end + %cmp6 = icmp ult i32 %sub, 40 + br i1 %cmp6, label %cond.end9, label %cond.false8 + +cond.false8: ; preds = %cond.end4 + tail call void @glibc_assert_fail(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str.3, i64 0, i64 0), i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.1, i64 0, i64 0), i32 5, i8* getelementptr inbounds ([35 x i8], [35 x i8]* @__PRETTY_FUNCTION__._Z1fjj, i64 0, i64 0)) #2 + unreachable + +cond.end9: ; preds = %cond.end4 + ret void +} + +; CHECK-LABEL: define void @merge_glibc_asserts(i32 %x, i32 %y) +; CHECK: cond.false: +; CHECK: %[[phi_line:[^ ]*]] = phi i32 [ 3, %entry ], [ 4, %cond.end ], [ 5, %cond.end4 ] +; CHECK: %[[phi_expr:[^ ]*]] = phi i8* [ getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), %entry ], [ getelementptr inbounds ([10 x i8], [10 x i8]* @.str.2, i64 0, i64 0), %cond.end ], [ getelementptr inbounds ([11 x i8], [11 x i8]* @.str.3, i64 0, i64 0), %cond.end4 ] +; CHECK: tail call void @glibc_assert_fail( +; CHECK-SAME: i8* %[[phi_expr]], +; CHECK-SAME: i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str.1, i64 0, i64 0) +; CHECK-SAME: i32 %[[phi_line]], +; CHECK-SAME: i8* getelementptr inbounds ([35 x i8], [35 x i8]* @__PRETTY_FUNCTION__._Z1fjj, i64 0, i64 0)) diff --git a/llvm/test/Transforms/SimplifyCFG/tail-merge-noreturn.ll b/llvm/test/Transforms/SimplifyCFG/tail-merge-noreturn.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/tail-merge-noreturn.ll @@ -0,0 +1,426 @@ +; RUN: opt -tail-merge-bbs-ending-in-unreachable -simplifycfg -S < %s | FileCheck %s + +; Test that we tail merge noreturn call blocks and phi constants properly. + +declare void @abort() +declare void @assert_fail_1(i32) +declare void @assert_fail_1_alt(i32) + +define void @merge_simple() { + %c1 = call i1 @foo() + br i1 %c1, label %cont1, label %a1 +a1: + call void @assert_fail_1(i32 0) + unreachable +cont1: + %c2 = call i1 @foo() + br i1 %c2, label %cont2, label %a2 +a2: + call void @assert_fail_1(i32 0) + unreachable +cont2: + %c3 = call i1 @foo() + br i1 %c3, label %cont3, label %a3 +a3: + call void @assert_fail_1(i32 0) + unreachable +cont3: + ret void +} + +; CHECK-LABEL: define void @merge_simple() +; CHECK: br i1 %c1, label %cont1, label %a1 +; CHECK: a1: +; CHECK: call void @assert_fail_1(i32 0) +; CHECK: unreachable +; CHECK: br i1 %c2, label %cont2, label %a1 +; CHECK-NOT: assert_fail_1 +; CHECK: br i1 %c3, label %cont3, label %a1 +; CHECK-NOT: assert_fail_1 +; CHECK: ret void + +define void @phi_three_constants() { +entry: + %c1 = call i1 @foo() + br i1 %c1, label %cont1, label %a1 +a1: + call void @assert_fail_1(i32 0) + unreachable +cont1: + %c2 = call i1 @foo() + br i1 %c2, label %cont2, label %a2 +a2: + call void @assert_fail_1(i32 1) + unreachable +cont2: + %c3 = call i1 @foo() + br i1 %c3, label %cont3, label %a3 +a3: + call void @assert_fail_1(i32 2) + unreachable +cont3: + ret void +} + +; CHECK-LABEL: define void @phi_three_constants() +; CHECK: br i1 %c1, label %cont1, label %a1 +; CHECK: a1: +; CHECK: %[[p:[^ ]*]] = phi i32 [ 0, %entry ], [ 1, %cont1 ], [ 2, %cont2 ] +; CHECK: call void @assert_fail_1(i32 %[[p]]) +; CHECK: unreachable +; CHECK: br i1 %c2, label %cont2, label %a1 +; CHECK-NOT: assert_fail_1 +; CHECK: br i1 %c3, label %cont3, label %a1 +; CHECK-NOT: assert_fail_1 +; CHECK: ret void + + +define void @dont_phi_values(i32 %x, i32 %y) { + %c1 = call i1 @foo() + br i1 %c1, label %cont1, label %a1 +a1: + call void @assert_fail_1(i32 %x) + unreachable +cont1: + %c2 = call i1 @foo() + br i1 %c2, label %cont2, label %a2 +a2: + call void @assert_fail_1(i32 %y) + unreachable +cont2: + ret void +} + +; CHECK-LABEL: define void @dont_phi_values(i32 %x, i32 %y) +; CHECK: call void @assert_fail_1(i32 %x) +; CHECK: call void @assert_fail_1(i32 %y) +; CHECK: ret void + + +define void @dont_phi_callees() { + %c1 = call i1 @foo() + br i1 %c1, label %cont1, label %a1 +cont1: + %c2 = call i1 @foo() + br i1 %c2, label %cont2, label %a2 +cont2: + ret void +a1: + call void @assert_fail_1(i32 0) + unreachable +a2: + call void @assert_fail_1_alt(i32 0) + unreachable +} + +; CHECK-LABEL: define void @dont_phi_callees() +; CHECK: call void @assert_fail_1(i32 0) +; CHECK: unreachable +; CHECK: call void @assert_fail_1_alt(i32 0) +; CHECK: unreachable + + +declare i1 @foo() +declare i1 @bar() + +define void @unmergeable_phis(i32 %v, i1 %c) { +entry: + br i1 %c, label %s1, label %s2 +s1: + %c1 = call i1 @foo() + br i1 %c1, label %a1, label %a2 +s2: + %c2 = call i1 @bar() + br i1 %c2, label %a1, label %a2 +a1: + %l1 = phi i32 [ 0, %s1 ], [ 1, %s2 ] + call void @assert_fail_1(i32 %l1) + unreachable +a2: + %l2 = phi i32 [ 2, %s1 ], [ 3, %s2 ] + call void @assert_fail_1(i32 %l2) + unreachable +} + +; CHECK-LABEL: define void @unmergeable_phis(i32 %v, i1 %c) +; CHECK: a1: +; CHECK: %l1 = phi i32 [ 0, %s1 ], [ 1, %s2 ], [ %l2, %a2 ] +; CHECK: call void @assert_fail_1(i32 %l1) +; CHECK: unreachable +; CHECK: a2: +; CHECK: %l2 = phi i32 [ 2, %s1 ], [ 3, %s2 ] +; CHECK: br label %a1 + + +define void @tail_merge_switch(i32 %v) { +entry: + switch i32 %v, label %ret [ + i32 0, label %a1 + i32 13, label %a2 + i32 42, label %a3 + ] +a1: + call void @assert_fail_1(i32 0) + unreachable +a2: + call void @assert_fail_1(i32 1) + unreachable +a3: + call void @assert_fail_1(i32 2) + unreachable +ret: + ret void +} + +; CHECK-LABEL: define void @tail_merge_switch(i32 %v) +; CHECK: a1: +; CHECK: %[[p:[^ ]*]] = phi i32 [ 0, %entry ], [ 1, %a2 ], [ 2, %a3 ] +; CHECK: call void @assert_fail_1(i32 %[[p]]) +; CHECK: unreachable +; CHECK: a2: +; CHECK: br label %a1 +; CHECK: a3: +; CHECK: br label %a1 + + +define void @need_to_add_bb2_preds(i1 %c1) { +bb1: + br i1 %c1, label %bb2, label %a1 +bb2: + %c2 = call i1 @bar() + br i1 %c2, label %a2, label %a3 + +a1: + call void @assert_fail_1(i32 0) + unreachable +a2: + call void @assert_fail_1(i32 1) + unreachable +a3: + call void @assert_fail_1(i32 2) + unreachable +} + +; CHECK-LABEL: define void @need_to_add_bb2_preds(i1 %c1) +; CHECK: bb1: +; CHECK: br i1 %c1, label %bb2, label %a1 +; CHECK: bb2: +; CHECK: %c2 = call i1 @bar() +; CHECK: %[[sel:[^ ]*]] = select i1 %c2, i32 1, i32 2 +; CHECK: br label %a1 +; CHECK: a1: +; CHECK: %[[p:[^ ]*]] = phi i32 [ 0, %bb1 ], [ %[[sel]], %bb2 ] +; CHECK: call void @assert_fail_1(i32 %[[p]]) +; CHECK: unreachable + + +define void @phi_in_bb2() { +entry: + %c1 = call i1 @foo() + br i1 %c1, label %cont1, label %a1 +a1: + call void @assert_fail_1(i32 0) + unreachable +cont1: + %c2 = call i1 @foo() + br i1 %c2, label %cont2, label %a2 +a2: + %p2 = phi i32 [ 1, %cont1 ], [ 2, %cont2 ] + call void @assert_fail_1(i32 %p2) + unreachable +cont2: + %c3 = call i1 @foo() + br i1 %c3, label %cont3, label %a2 +cont3: + ret void +} + +; CHECK-LABEL: define void @phi_in_bb2() +; CHECK: a1: +; CHECK: %[[p:[^ ]*]] = phi i32 [ 0, %entry ], [ 1, %cont1 ], [ 2, %cont2 ] +; CHECK: call void @assert_fail_1(i32 %[[p]]) +; CHECK: unreachable +; CHECK: cont3: +; CHECK: ret void + + +; Don't tail merge these noreturn blocks using lifetime end. It prevents us +; from sharing stack slots for x and y. + +declare void @escape_i32_ptr(i32*) +declare void @llvm.lifetime.start(i64, i8* nocapture) +declare void @llvm.lifetime.end(i64, i8* nocapture) + +define void @dont_merge_lifetimes(i32 %c1, i32 %c2) { +entry: + %x = alloca i32, align 4 + %y = alloca i32, align 4 + switch i32 %c1, label %if.end9 [ + i32 13, label %if.then + i32 42, label %if.then3 + ] + +if.then: ; preds = %entry + %0 = bitcast i32* %x to i8* + call void @llvm.lifetime.start(i64 4, i8* nonnull %0) + store i32 0, i32* %x, align 4 + %tobool = icmp eq i32 %c2, 0 + br i1 %tobool, label %if.end, label %if.then1 + +if.then1: ; preds = %if.then + call void @escape_i32_ptr(i32* nonnull %x) + br label %if.end + +if.end: ; preds = %if.then1, %if.then + call void @llvm.lifetime.end(i64 4, i8* nonnull %0) + call void @abort() + unreachable + +if.then3: ; preds = %entry + %1 = bitcast i32* %y to i8* + call void @llvm.lifetime.start(i64 4, i8* nonnull %1) + store i32 0, i32* %y, align 4 + %tobool5 = icmp eq i32 %c2, 0 + br i1 %tobool5, label %if.end7, label %if.then6 + +if.then6: ; preds = %if.then3 + call void @escape_i32_ptr(i32* nonnull %y) + br label %if.end7 + +if.end7: ; preds = %if.then6, %if.then3 + call void @llvm.lifetime.end(i64 4, i8* nonnull %1) + call void @abort() + unreachable + +if.end9: ; preds = %entry + ret void +} + +; CHECK-LABEL: define void @dont_merge_lifetimes(i32 %c1, i32 %c2) +; CHECK: call void @abort() +; CHECK: unreachable +; CHECK: call void @abort() +; CHECK: unreachable + + +; Dead phis in the block need to be handled. + +declare void @llvm.dbg.value(metadata, i64, metadata, metadata) + +define void @dead_phi() { +entry: + %c1 = call i1 @foo() + br i1 %c1, label %cont1, label %a1 +a1: + %dead = phi i32 [ 0, %entry ], [ 1, %cont1 ] + call void @assert_fail_1(i32 0) + unreachable +cont1: + %c2 = call i1 @foo() + br i1 %c2, label %cont2, label %a1 +cont2: + %c3 = call i1 @foo() + br i1 %c3, label %cont3, label %a3 +a3: + call void @assert_fail_1(i32 0) + unreachable +cont3: + ret void +} + +; CHECK-LABEL: define void @dead_phi() +; CHECK: a1: +; CHECK-NEXT: call void @assert_fail_1(i32 0) +; CHECK-NOT: @assert_fail_1 +; CHECK: ret void + +define void @strip_dbg_value(i32 %c) { +entry: + call void @llvm.dbg.value(metadata i32 %c, i64 0, metadata !12, metadata !13), !dbg !14 + switch i32 %c, label %sw.epilog [ + i32 13, label %sw.bb + i32 42, label %sw.bb1 + ] + +sw.bb: ; preds = %entry + call void @llvm.dbg.value(metadata i32 55, i64 0, metadata !12, metadata !13), !dbg !14 + tail call void @abort() + unreachable + +sw.bb1: ; preds = %entry + call void @llvm.dbg.value(metadata i32 67, i64 0, metadata !12, metadata !13), !dbg !14 + tail call void @abort() + unreachable + +sw.epilog: ; preds = %entry + ret void +} + +; CHECK-LABEL: define void @strip_dbg_value(i32 %c) +; CHECK: entry: +; CHECK: call void @llvm.dbg.value(metadata i32 %c, {{.*}}) +; CHECK: switch i32 %c, label %sw.epilog [ +; CHECK: i32 13, label %sw.bb +; CHECK: i32 42, label %sw.bb +; CHECK: ] +; CHECK: sw.bb: ; preds = %entry +; CHECK-NOT: llvm.dbg.value +; CHECK: tail call void @abort() +; CHECK: unreachable + +define void @dead_phi_and_dbg(i32 %c) { +entry: + call void @llvm.dbg.value(metadata i32 %c, i64 0, metadata !12, metadata !13), !dbg !14 + switch i32 %c, label %sw.epilog [ + i32 13, label %sw.bb + i32 42, label %sw.bb1 + i32 53, label %sw.bb2 + ] + +sw.bb: ; preds = %entry + %c.1 = phi i32 [ 55, %entry], [ 67, %sw.bb1 ] + call void @llvm.dbg.value(metadata i32 %c.1, i64 0, metadata !12, metadata !13), !dbg !14 + tail call void @abort() + unreachable + +sw.bb1: + br label %sw.bb + +sw.bb2: ; preds = %entry + call void @llvm.dbg.value(metadata i32 84, i64 0, metadata !12, metadata !13), !dbg !14 + tail call void @abort() + unreachable + +sw.epilog: ; preds = %entry + ret void +} + +; CHECK-LABEL: define void @dead_phi_and_dbg(i32 %c) +; CHECK: entry: +; CHECK: call void @llvm.dbg.value(metadata i32 %c, {{.*}}) +; CHECK: switch i32 %c, label %sw.epilog [ +; CHECK: i32 13, label %sw.bb +; CHECK: i32 42, label %sw.bb +; CHECK: i32 53, label %sw.bb +; CHECK: ] +; CHECK: sw.bb: ; preds = %entry +; CHECK-NOT: llvm.dbg.value +; CHECK: tail call void @abort() +; CHECK: unreachable +; CHECK-NOT: call void @abort() +; CHECK: ret void + + + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, runtimeVersion: 0, emissionKind: FullDebug) +!1 = !DIFile(filename: "t.c", directory: "asdf") +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"PIC Level", i32 2} +!7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 2, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!12 = !DILocalVariable(name: "c", scope: !7) +!13 = !DIExpression() +!14 = !DILocation(line: 2, column: 12, scope: !7)