diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -495,6 +495,9 @@ if (!LargeOffsetGEPMap.empty()) MadeChange |= splitLargeGEPOffsets(); + if (MadeChange) + eliminateFallThrough(F); + // Really free removed instructions during promotion. for (Instruction *I : RemovedInsts) I->deleteValue(); @@ -1964,6 +1967,11 @@ if (II) { switch (II->getIntrinsicID()) { default: break; + case Intrinsic::assume: { + II->eraseFromParent(); + return true; + } + case Intrinsic::experimental_widenable_condition: { // Give up on future widening oppurtunties so that we can fold away dead // paths and merge blocks before going into block-local instruction diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll b/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll --- a/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/extend-sink-hoist.ll @@ -13,7 +13,6 @@ %s1 = sext i64 %l1 to i128 br label %block2 -; CHECK-LABEL: block2: ; CHECK-NEXT: sext ; CHECK-NEXT: load ; CHECK-NEXT: sext @@ -34,7 +33,6 @@ %l1 = load i32, i32* %mem1 br label %block2 -; CHECK-LABEL: block2: ; CHECK-NEXT: load ; CHECK-NEXT: sext block2: diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll b/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll --- a/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll +++ b/llvm/test/Transforms/CodeGenPrepare/X86/optimizeSelect-DT.ll @@ -15,9 +15,10 @@ ; CHECK-NEXT: br label [[SELECT_END]] ; CHECK: select.end: ; CHECK-NEXT: [[MUL:%.*]] = phi i32 [ [[REM]], [[SELECT_TRUE_SINK]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[NEG:%.*]] = add i32 [[T1:%.*]], -1 +; CHECK-NEXT: [[USUB:%.*]] = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[T1:%.*]], i32 1) +; CHECK-NEXT: [[NEG:%.*]] = extractvalue { i32, i1 } [[USUB]], 0 +; CHECK-NEXT: [[TOBOOL:%.*]] = extractvalue { i32, i1 } [[USUB]], 1 ; CHECK-NEXT: [[ADD:%.*]] = add i32 [[NEG]], [[MUL]] -; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[T1]], 0 ; CHECK-NEXT: ret i1 [[TOBOOL]] ; entry: diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/tailcall-assume-xbb.ll b/llvm/test/Transforms/CodeGenPrepare/X86/tailcall-assume-xbb.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/X86/tailcall-assume-xbb.ll @@ -0,0 +1,48 @@ +; RUN: opt -codegenprepare -S -mtriple=x86_64-linux < %s | FileCheck %s + +; The ret instruction can be duplicated into BB case2 even though there is an +; intermediate BB exit1 and call to llvm.assume. + +@ptr = external global i8*, align 8 + +; CHECK: %ret1 = tail call i8* @qux() +; CHECK-NEXT: ret i8* %ret1 + +; CHECK: %ret2 = tail call i8* @bar() +; CHECK-NEXT: ret i8* %ret2 + +define i8* @foo(i64 %size, i64 %v1, i64 %v2) { +entry: + %cmp1 = icmp ult i64 %size, 1025 + br i1 %cmp1, label %if.end, label %case1 + +case1: + %ret1 = tail call i8* @qux() + br label %exit2 + +if.end: + %cmp2 = icmp ult i64 %v1, %v2 + br i1 %cmp2, label %case3, label %case2 + +case2: + %ret2 = tail call i8* @bar() + br label %exit1 + +case3: + %ret3 = load i8*, i8** @ptr, align 8 + br label %exit1 + +exit1: + %retval1 = phi i8* [ %ret2, %case2 ], [ %ret3, %case3 ] + %cmp3 = icmp ne i8* %retval1, null + tail call void @llvm.assume(i1 %cmp3) + br label %exit2 + +exit2: + %retval2 = phi i8* [ %ret1, %case1 ], [ %retval1, %exit1 ] + ret i8* %retval2 +} + +declare void @llvm.assume(i1) +declare i8* @qux() +declare i8* @bar()