Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -700,16 +700,16 @@ SDValue getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, + bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo); SDValue getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVol, + SDValue Size, unsigned Align, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo); SDValue getMemset(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, bool isVol, + SDValue Size, unsigned Align, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo); /// getSetCC - Helper function to make it easier to build SetCC's if you just Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4291,7 +4291,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, - MachinePointerInfo DstPtrInfo, + bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4349,15 +4349,16 @@ Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult(); - std::pair CallResult = TLI->LowerCallTo(CLI); + .setDiscardResult() + .setTailCall(isTailCall); + std::pair CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, - unsigned Align, bool isVol, + unsigned Align, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4404,15 +4405,16 @@ Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult(); - std::pair CallResult = TLI->LowerCallTo(CLI); + .setDiscardResult() + .setTailCall(isTailCall); + std::pair CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, - unsigned Align, bool isVol, + unsigned Align, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -4461,7 +4463,8 @@ Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), TLI->getPointerTy()), std::move(Args), 0) - .setDiscardResult(); + .setDiscardResult() + .setTailCall(isTailCall); std::pair CallResult = TLI->LowerCallTo(CLI); return CallResult.second; Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -4590,9 +4590,15 @@ if (!Align) Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)))); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + false, isTC, + MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + if (MC.getNode() != nullptr) + DAG.setRoot(MC); + else + HasTailCall = true; return nullptr; } case Intrinsic::memset: { @@ -4609,8 +4615,13 @@ if (!Align) Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, - MachinePointerInfo(I.getArgOperand(0)))); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + isTC, MachinePointerInfo(I.getArgOperand(0))); + if (MS.getNode() != nullptr) + DAG.setRoot(MS); + else + HasTailCall = true; return nullptr; } case Intrinsic::memmove: { @@ -4629,9 +4640,14 @@ if (!Align) Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, - MachinePointerInfo(I.getArgOperand(0)), - MachinePointerInfo(I.getArgOperand(1)))); + bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget()); + SDValue MM = DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, + isTC, MachinePointerInfo(I.getArgOperand(0)), + MachinePointerInfo(I.getArgOperand(1))); + if (MM.getNode() != nullptr) + DAG.setRoot(MM); + else + HasTailCall = true; return nullptr; } case Intrinsic::dbg_declare: { Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2702,8 +2702,9 @@ DAG.getConstant(Outs[i].Flags.getByValSize(), MVT::i64); SDValue Cpy = DAG.getMemcpy( Chain, DL, DstAddr, Arg, SizeNode, Outs[i].Flags.getByValAlign(), - /*isVol = */ false, - /*AlwaysInline = */ false, DstInfo, MachinePointerInfo()); + /*isVol = */ false, /*AlwaysInline = */ false, + /*isTailCall = */ false, + DstInfo, MachinePointerInfo()); MemOpChains.push_back(Cpy); } else { @@ -3921,7 +3922,7 @@ return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1), Op.getOperand(2), DAG.getConstant(VaListSize, MVT::i32), - 8, false, false, MachinePointerInfo(DestSV), + 8, false, false, false, MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); } Index: lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.cpp +++ lib/Target/Hexagon/HexagonISelLowering.cpp @@ -316,6 +316,7 @@ SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, /*AlwaysInline=*/false, + /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); } Index: lib/Target/MSP430/MSP430ISelLowering.cpp =================================================================== --- lib/Target/MSP430/MSP430ISelLowering.cpp +++ lib/Target/MSP430/MSP430ISelLowering.cpp @@ -645,6 +645,7 @@ Flags.getByValAlign(), /*isVolatile*/false, /*AlwaysInline=*/true, + /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); } else { Index: lib/Target/Mips/MipsISelLowering.cpp =================================================================== --- lib/Target/Mips/MipsISelLowering.cpp +++ lib/Target/Mips/MipsISelLowering.cpp @@ -3682,6 +3682,7 @@ DAG.getIntPtrConstant(VA.getLocMemOffset())); Chain = DAG.getMemcpy(Chain, DL, Dst, Src, DAG.getConstant(MemCpySize, PtrTy), Alignment, /*isVolatile=*/false, /*AlwaysInline=*/false, + /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo()); MemOpChains.push_back(Chain); } Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -2234,7 +2234,7 @@ // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2), - DAG.getConstant(12, MVT::i32), 8, false, true, + DAG.getConstant(12, MVT::i32), 8, false, true, false, MachinePointerInfo(), MachinePointerInfo()); } @@ -3821,7 +3821,7 @@ SDLoc dl) { SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), - false, false, MachinePointerInfo(), + false, false, false, MachinePointerInfo(), MachinePointerInfo()); } Index: lib/Target/Sparc/SparcISelLowering.cpp =================================================================== --- lib/Target/Sparc/SparcISelLowering.cpp +++ lib/Target/Sparc/SparcISelLowering.cpp @@ -727,7 +727,8 @@ Chain = DAG.getMemcpy(Chain, dl, FIPtr, Arg, SizeNode, Align, false, // isVolatile, - (Size <= 32), // AlwaysInline if size <= 32 + (Size <= 32), // AlwaysInline if size <= 32, + false, // isTailCall MachinePointerInfo(), MachinePointerInfo()); ByValArgs.push_back(FIPtr); } Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2222,6 +2222,7 @@ return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(32), /*Align*/8, /*isVolatile*/false, /*AlwaysInline*/false, + /*isTailCall*/false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -2143,6 +2143,7 @@ return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile*/false, /*AlwaysInline=*/true, + /*isTailCall*/false, MachinePointerInfo(), MachinePointerInfo()); } @@ -14450,7 +14451,7 @@ return DAG.getMemcpy(Chain, DL, DstPtr, SrcPtr, DAG.getIntPtrConstant(24), 8, /*isVolatile*/false, - false, + false, false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); } Index: lib/Target/X86/X86SelectionDAGInfo.cpp =================================================================== --- lib/Target/X86/X86SelectionDAGInfo.cpp +++ lib/Target/X86/X86SelectionDAGInfo.cpp @@ -193,7 +193,8 @@ DAG.getConstant(Offset, AddrVT)), Src, DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, DstPtrInfo.getWithOffset(Offset)); + Align, isVolatile, false, + DstPtrInfo.getWithOffset(Offset)); } // TODO: Use a Tokenfactor, as in memcpy, instead of a single chain. @@ -282,7 +283,7 @@ DAG.getNode(ISD::ADD, dl, SrcVT, Src, DAG.getConstant(Offset, SrcVT)), DAG.getConstant(BytesLeft, SizeVT), - Align, isVolatile, AlwaysInline, + Align, isVolatile, AlwaysInline, false, DstPtrInfo.getWithOffset(Offset), SrcPtrInfo.getWithOffset(Offset))); } Index: lib/Target/XCore/XCoreISelLowering.cpp =================================================================== --- lib/Target/XCore/XCoreISelLowering.cpp +++ lib/Target/XCore/XCoreISelLowering.cpp @@ -1423,7 +1423,7 @@ InVals.push_back(FIN); MemOps.push_back(DAG.getMemcpy(Chain, dl, FIN, ArgDI->SDV, DAG.getConstant(Size, MVT::i32), - Align, false, false, + Align, false, false, false, MachinePointerInfo(), MachinePointerInfo())); } else { @@ -1834,10 +1834,11 @@ LD->getAlignment() == Alignment && !LD->isVolatile() && !LD->isIndexed() && Chain.reachesChainWithoutSideEffects(SDValue(LD, 1))) { + bool isTail = isInTailCallPosition(DAG, ST, Chain); return DAG.getMemmove(Chain, dl, ST->getBasePtr(), LD->getBasePtr(), DAG.getConstant(StoreBits/8, MVT::i32), - Alignment, false, ST->getPointerInfo(), + Alignment, false, isTail, ST->getPointerInfo(), LD->getPointerInfo()); } } Index: test/CodeGen/AArch64/tailcall-mem-intrinsics.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/tailcall-mem-intrinsics.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=aarch64 < %s | FileCheck %s + +; CHECK-LABEL: tail_memcpy: +; CHECK: b memcpy +define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: tail_memmove: +; CHECK: b memmove +define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { +entry: + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: tail_memset: +; CHECK: b memset +define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 { +entry: + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 + +attributes #0 = { nounwind } Index: test/CodeGen/ARM/tail-call-mem-intrinsics.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/tail-call-mem-intrinsics.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=arm < %s | FileCheck %s + +; CHECK-LABEL: tail_memcpy: +; CHECK: b memcpy +define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: tail_memmove: +; CHECK: b memmove +define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { +entry: + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: tail_memset: +; CHECK: b memset +define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 { +entry: + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 + +attributes #0 = { nounwind } Index: test/CodeGen/Hexagon/tail-call-mem-intrinsics.ll =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/tail-call-mem-intrinsics.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=hexagon < %s | FileCheck %s + +; CHECK-LABEL: tail_memcpy: +; CHECK: jump memcpy +define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: tail_memmove: +; CHECK: jump memmove +define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { +entry: + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: tail_memset: +; CHECK: jump memset +define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 { +entry: + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 + +attributes #0 = { nounwind } Index: test/CodeGen/SystemZ/tail-call-mem-intrinsics.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/tail-call-mem-intrinsics.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=systemz < %s | FileCheck %s + +; CHECK-LABEL: tail_memcpy: +; CHECK: jg memcpy +define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: tail_memmove: +; CHECK: jg memmove +define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { +entry: + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: tail_memset: +; CHECK: jg memset +define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 { +entry: + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 + +attributes #0 = { nounwind } Index: test/CodeGen/X86/tailcall-mem-intrinsics.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/tailcall-mem-intrinsics.ll @@ -0,0 +1,31 @@ +; RUN: llc -march=x86 < %s | FileCheck %s + +; CHECK-LABEL: tail_memcpy +; CHECK: jmp memcpy +define void @tail_memcpy(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: tail_memset +; CHECK; jmp memmove +define void @tail_memmove(i8* nocapture %p, i8* nocapture readonly %q, i32 %n) #0 { +entry: + tail call void @llvm.memmove.p0i8.p0i8.i32(i8* %p, i8* %q, i32 %n, i32 1, i1 false) + ret void +} + +; CHECK-LABEL: tail_memset +; CHECK: jmp memset +define void @tail_memset(i8* nocapture %p, i8 %c, i32 %n) #0 { +entry: + tail call void @llvm.memset.p0i8.i32(i8* %p, i8 %c, i32 %n, i32 1, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 +declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1) #0 +declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) #0 + +attributes #0 = { nounwind }