diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2102,6 +2102,7 @@ return false; PHINode *PN = nullptr; + ExtractValueInst *EVI = nullptr; BitCastInst *BCI = nullptr; Value *V = RetI->getReturnValue(); if (V) { @@ -2109,6 +2110,14 @@ if (BCI) V = BCI->getOperand(0); + EVI = dyn_cast(V); + if (EVI) { + V = EVI->getOperand(0); + if (!std::all_of(EVI->idx_begin(), EVI->idx_end(), + [](unsigned idx) { return idx == 0; })) + return false; + } + PN = dyn_cast(V); if (!PN) return false; @@ -2122,7 +2131,9 @@ if (PN) { BasicBlock::iterator BI = BB->begin(); // Skip over debug and the bitcast. - do { ++BI; } while (isa(BI) || &*BI == BCI); + do { + ++BI; + } while (isa(BI) || &*BI == BCI || &*BI == EVI); if (&*BI != RetI) return false; } else { diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -902,9 +902,25 @@ Pred->getInstList().insert(NewRet->getIterator(), NewBC); *i = NewBC; } + + Instruction *NewEV = nullptr; + if (ExtractValueInst *EVI = dyn_cast(V)) { + V = EVI->getOperand(0); + NewEV = EVI->clone(); + if (NewBC) { + NewBC->setOperand(0, NewEV); + Pred->getInstList().insert(NewBC->getIterator(), NewEV); + } else { + Pred->getInstList().insert(NewRet->getIterator(), NewEV); + *i = NewEV; + } + } + if (PHINode *PN = dyn_cast(V)) { if (PN->getParent() == BB) { - if (NewBC) + if (NewEV) { + NewEV->setOperand(0, PN->getIncomingValueForBlock(Pred)); + } else if (NewBC) NewBC->setOperand(0, PN->getIncomingValueForBlock(Pred)); else *i = PN->getIncomingValueForBlock(Pred); diff --git a/llvm/test/CodeGen/X86/tailcall-extract.ll b/llvm/test/CodeGen/X86/tailcall-extract.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/tailcall-extract.ll @@ -0,0 +1,190 @@ +; RUN: llc -mtriple=x86_64-linux < %s | FileCheck %s +; RUN: opt -codegenprepare -S -mtriple=x86_64-linux < %s | FileCheck %s --check-prefix OPT + + +; The exit block containing extractvalue can be duplicated into the BB +; containing call. And later tail call can be generated. + +; CHECK-LABEL: test1: +; CHECK: jmp bar # TAILCALL +; CHECK: jmp foo # TAILCALL + +; OPT-LABEL: test1 +; OPT: if.then.i: +; OPT-NEXT: tail call { i8*, i64 } @bar +; OPT-NEXT: extractvalue +; OPT-NEXT: bitcast +; OPT-NEXT: ret +; +; OPT: if.end.i: +; OPT-NEXT: tail call { i8*, i64 } @foo +; OPT-NEXT: extractvalue +; OPT-NEXT: bitcast +; OPT-NEXT: ret + +define i64* @test1(i64 %size) { +entry: + %cmp.i.i = icmp ugt i64 %size, 16384 + %add.i.i = add i64 %size, 7 + %div.i.i = lshr i64 %add.i.i, 3 + %phitmp.i.i = trunc i64 %div.i.i to i32 + %cmp1.i = icmp eq i32 %phitmp.i.i, 0 + %cmp.i = or i1 %cmp.i.i, %cmp1.i + br i1 %cmp.i, label %if.end.i, label %if.then.i + if.then.i: ; preds = %entry + %call1.i = tail call { i8*, i64 } @bar(i64 %size) + br label %exit + +if.end.i: ; preds = %entry + %call2.i = tail call { i8*, i64 } @foo(i64 %size) + br label %exit + +exit: + %call1.i.sink = phi { i8*, i64 } [ %call1.i, %if.then.i ], [ %call2.i, %if.end.i ] + %ev = extractvalue { i8*, i64 } %call1.i.sink, 0 + %result = bitcast i8* %ev to i64* + ret i64* %result +} + + +; The extractvalue extracts a field with non-zero offset, so the exit block +; can't be duplicated. + +; CHECK-LABEL: test2: +; CHECK: callq bar +; CHECK: callq foo + +; OPT-LABEL: test2 +; OPT: if.then.i: +; OPT-NEXT: tail call { i8*, i64 } @bar +; OPT-NEXT: br label %exit +; +; OPT: if.end.i: +; OPT-NEXT: tail call { i8*, i64 } @foo +; OPT-NEXT: br label %exit +; +; OPT: exit: +; OPT-NEXT: phi +; OPT-NEXT: extractvalue +; OPT-NEXT: ret + +define i64 @test2(i64 %size) { +entry: + %cmp.i.i = icmp ugt i64 %size, 16384 + %add.i.i = add i64 %size, 7 + %div.i.i = lshr i64 %add.i.i, 3 + %phitmp.i.i = trunc i64 %div.i.i to i32 + %cmp1.i = icmp eq i32 %phitmp.i.i, 0 + %cmp.i = or i1 %cmp.i.i, %cmp1.i + br i1 %cmp.i, label %if.end.i, label %if.then.i + if.then.i: ; preds = %entry + %call1.i = tail call { i8*, i64 } @bar(i64 %size) + br label %exit + +if.end.i: ; preds = %entry + %call2.i = tail call { i8*, i64 } @foo(i64 %size) + br label %exit + +exit: + %call1.i.sink = phi { i8*, i64 } [ %call1.i, %if.then.i ], [ %call2.i, %if.end.i ] + %ev = extractvalue { i8*, i64 } %call1.i.sink, 1 + ret i64 %ev +} + + +; The extractvalue accesses a nest struct type, the extracted field has zero +; offset, so the exit block can still be duplicated, and tail call generated. + +; CHECK-LABEL: test3: +; CHECK: jmp baz # TAILCALL +; CHECK: jmp qux # TAILCALL + +; OPT-LABEL: test3 +; OPT: if.then.i: +; OPT-NEXT: tail call { { i8*, i64 }, i64 } @baz +; OPT-NEXT: extractvalue +; OPT-NEXT: bitcast +; OPT-NEXT: ret +; +; OPT: if.end.i: +; OPT-NEXT: tail call { { i8*, i64 }, i64 } @qux +; OPT-NEXT: extractvalue +; OPT-NEXT: bitcast +; OPT-NEXT: ret + +define i64* @test3(i64 %size) { +entry: + %cmp.i.i = icmp ugt i64 %size, 16384 + %add.i.i = add i64 %size, 7 + %div.i.i = lshr i64 %add.i.i, 3 + %phitmp.i.i = trunc i64 %div.i.i to i32 + %cmp1.i = icmp eq i32 %phitmp.i.i, 0 + %cmp.i = or i1 %cmp.i.i, %cmp1.i + br i1 %cmp.i, label %if.end.i, label %if.then.i + +if.then.i: ; preds = %entry + %call1.i = tail call { {i8*, i64}, i64 } @baz(i64 %size) + br label %exit + +if.end.i: ; preds = %entry + %call2.i = tail call { {i8*, i64}, i64 } @qux(i64 %size) + br label %exit + +exit: + %call1.i.sink = phi { {i8*, i64}, i64 } [ %call1.i, %if.then.i ], [ %call2.i, %if.end.i ] + %ev = extractvalue { {i8*, i64}, i64 } %call1.i.sink, 0, 0 + %result = bitcast i8* %ev to i64* + ret i64* %result +} + + +; The extractvalue accesses a nest struct with non-zero offset, so the exit +; block can't be duplicated. + +; CHECK-LABEL: test4: +; CHECK: callq baz +; CHECK: callq qux + +; OPT-LABEL: test4 +; OPT: if.then.i: +; OPT-NEXT: tail call { { i8*, i64 }, i64 } @baz +; OPT-NEXT: br label %exit +; +; OPT: if.end.i: +; OPT-NEXT: tail call { { i8*, i64 }, i64 } @qux +; OPT-NEXT: br label %exit +; +; OPT: exit: +; OPT-NEXT: phi +; OPT-NEXT: extractvalue +; OPT-NEXT: ret + +define i64 @test4(i64 %size) { +entry: + %cmp.i.i = icmp ugt i64 %size, 16384 + %add.i.i = add i64 %size, 7 + %div.i.i = lshr i64 %add.i.i, 3 + %phitmp.i.i = trunc i64 %div.i.i to i32 + %cmp1.i = icmp eq i32 %phitmp.i.i, 0 + %cmp.i = or i1 %cmp.i.i, %cmp1.i + br i1 %cmp.i, label %if.end.i, label %if.then.i + +if.then.i: ; preds = %entry + %call1.i = tail call { {i8*, i64}, i64 } @baz(i64 %size) + br label %exit + +if.end.i: ; preds = %entry + %call2.i = tail call { {i8*, i64}, i64 } @qux(i64 %size) + br label %exit + +exit: + %call1.i.sink = phi { {i8*, i64}, i64 } [ %call1.i, %if.then.i ], [ %call2.i, %if.end.i ] + %ev = extractvalue { {i8*, i64}, i64 } %call1.i.sink, 0, 1 + ret i64 %ev +} + + +declare { i8*, i64 } @foo(i64) +declare { i8*, i64 } @bar(i64) +declare { {i8*, i64}, i64 } @baz(i64) +declare { {i8*, i64}, i64 } @qux(i64)