diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -734,9 +734,20 @@ SDValue Call, SDValue OrigChain) { SmallVector Ops; SDValue Chain = OrigChain.getOperand(0); - if (Chain.getNode() == Load.getNode()) + if (Chain.getNode() == Load.getNode()) { + // Multiple chains might branch out of Load, fix them. Ops.push_back(Load.getOperand(0)); - else { + for (auto UI = Load.getNode()->use_begin(), UE = Load.getNode()->use_end(); + UI != UE;) { + auto VT = UI.getUse().getValueType(); + auto *U = *UI++; + if (VT == MVT::Other) { + Ops.append(U->op_begin() + 1, U->op_end()); + CurDAG->UpdateNodeOperands(U, Ops); + Ops.resize(1); + } + } + } else { assert(Chain.getOpcode() == ISD::TokenFactor && "Unexpected chain operand"); for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) @@ -748,9 +759,9 @@ CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops); Ops.clear(); Ops.push_back(NewChain); + Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end()); + CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops); } - Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end()); - CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops); CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), Load.getOperand(1), Load.getOperand(2)); @@ -779,11 +790,18 @@ LD->getExtensionType() != ISD::NON_EXTLOAD) return false; - // Now let's find the callseq_start. - while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) { - if (!Chain.hasOneUse()) - return false; - Chain = Chain.getOperand(0); + if (HasCallSeq) { + // Now let's find the callseq_start. + while (Chain.getOpcode() != ISD::CALLSEQ_START) { + if (!Chain.hasOneUse()) + return false; + Chain = Chain.getOperand(0); + } + } else if (Chain.getOpcode() == ISD::CopyToReg) { + // Locate first CopyToReg in the sequence of CopyToReg-s. + while (Chain.getOperand(0).getOpcode() == ISD::CopyToReg) { + Chain = Chain.getOperand(0); + } } if (!Chain.getNumOperands()) @@ -1188,6 +1206,17 @@ SDValue Load = N->getOperand(1); if (!isCalleeLoad(Load, Chain, HasCallSeq)) continue; + if (N->getOpcode() == X86ISD::TC_RETURN) { + // The TC_RETURN instructions execute after the epilog, so they + // can never use callee-saved registers. Ensure that there are + // registers available for the load address and the index. + unsigned NumRegs = 0; + for (unsigned i = 3, e = N->getNumOperands(); i != e; ++i) + if (isa(N->getOperand(i))) + ++NumRegs; + if (NumRegs > 6) + continue; + } moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); ++NumLoadMoved; MadeChange = true; diff --git a/llvm/test/CodeGen/X86/cfguard-checks.ll b/llvm/test/CodeGen/X86/cfguard-checks.ll --- a/llvm/test/CodeGen/X86/cfguard-checks.ll +++ b/llvm/test/CodeGen/X86/cfguard-checks.ll @@ -203,8 +203,7 @@ ; X64-LABEL: vmptr_thunk: ; X64: movq (%rcx), %rax ; X64-NEXT: movq 8(%rax), %rax - ; X64-NEXT: movq __guard_dispatch_icall_fptr(%rip), %rdx - ; X64-NEXT: rex64 jmpq *%rdx # TAILCALL + ; X64-NEXT: rex64 jmpq *__guard_dispatch_icall_fptr(%rip) # TAILCALL ; X64-NOT: callq } diff --git a/llvm/test/CodeGen/X86/musttail-varargs.ll b/llvm/test/CodeGen/X86/musttail-varargs.ll --- a/llvm/test/CodeGen/X86/musttail-varargs.ll +++ b/llvm/test/CodeGen/X86/musttail-varargs.ll @@ -356,8 +356,7 @@ ; WINDOWS-NEXT: cmpb $1, (%rcx) ; WINDOWS-NEXT: jne .LBB2_2 ; WINDOWS-NEXT: # %bb.1: # %then -; WINDOWS-NEXT: movq 8(%rcx), %rax -; WINDOWS-NEXT: rex64 jmpq *%rax # TAILCALL +; WINDOWS-NEXT: rex64 jmpq *8(%rcx) # TAILCALL ; WINDOWS-NEXT: .LBB2_2: # %else ; WINDOWS-NEXT: movq 16(%rcx), %rax ; WINDOWS-NEXT: movl $42, {{.*}}(%rip) diff --git a/llvm/test/CodeGen/X86/sibcall-4.ll b/llvm/test/CodeGen/X86/sibcall-4.ll --- a/llvm/test/CodeGen/X86/sibcall-4.ll +++ b/llvm/test/CodeGen/X86/sibcall-4.ll @@ -4,7 +4,7 @@ define ghccc void @t(i32* %Base_Arg, i32* %Sp_Arg, i32* %Hp_Arg, i32 %R1_Arg) nounwind { cm1: ; CHECK-LABEL: t: -; CHECK: jmpl *%eax +; CHECK: jmpl *(%eax) %nm3 = getelementptr i32, i32* %Sp_Arg, i32 1 %nm9 = load i32, i32* %Sp_Arg %nma = inttoptr i32 %nm9 to void (i32*, i32*, i32*, i32)* diff --git a/llvm/test/CodeGen/X86/tailcall-64.ll b/llvm/test/CodeGen/X86/tailcall-64.ll --- a/llvm/test/CodeGen/X86/tailcall-64.ll +++ b/llvm/test/CodeGen/X86/tailcall-64.ll @@ -227,6 +227,20 @@ ret void } +; Check that we can fold an indexed load into a tail call instruction, multiple args. +define void @fold_indexed_load_3(i8* %mbstr, i32* %fw1, i32 %fw2, i64 %idxprom) nounwind uwtable ssp { +; CHECK-LABEL: fold_indexed_load_3: +; CHECK: ## %bb.0: ## %entry +; CHECK-NEXT: leaq (%rcx,%rcx,4), %rax +; CHECK-NEXT: movq _func_table@{{.*}}(%rip), %rcx +; CHECK-NEXT: jmpq *(%rcx,%rax,8) ## TAILCALL +entry: + %dsplen = getelementptr inbounds [0 x %struct.funcs], [0 x %struct.funcs]* @func_table, i64 0, i64 %idxprom, i32 0 + %x1 = load i32 (i8*, i32*, i32)*, i32 (i8*, i32*, i32)** %dsplen, align 8 + %call = tail call i32 %x1(i8* %mbstr, i32* %fw1, i32 %fw2) nounwind + ret void +} + @funcs = external constant [0 x i32 (i8*, ...)*] ; Fold an indexed load into the tail call instruction.