diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -3109,7 +3109,7 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl) { - SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32); + SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl); return DAG.getMemcpy( Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -24,6 +24,10 @@ #define DEBUG_TYPE "x86-selectiondag-info" +static cl::opt + UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false), + cl::desc("Use fast short rep mov in memcpy lowering")); + bool X86SelectionDAGInfo::isBaseRegConflictPossible( SelectionDAG &DAG, ArrayRef ClobberSet) const { // We cannot use TRI->hasBasePointer() until *after* we select all basic @@ -306,6 +310,10 @@ const X86Subtarget &Subtarget = DAG.getMachineFunction().getSubtarget(); + // If enabled and available, use fast short rep mov. + if (UseFSRMForMemcpy && Subtarget.hasFSRM()) + return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8); + /// Handle constant sizes, if (ConstantSDNode *ConstantSize = dyn_cast(Size)) return emitConstantSizeRepmov( diff --git a/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll b/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll @@ -0,0 +1,31 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mattr=-fsrm < %s -o - | FileCheck %s --check-prefix=NOFSRM +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mattr=+fsrm < %s -o - | FileCheck %s --check-prefix=FSRM +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=haswell < %s | FileCheck %s --check-prefix=NOFSRM +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-client < %s | FileCheck %s --check-prefix=FSRM +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-server < %s | FileCheck %s --check-prefix=FSRM + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind + +define void @test1(i8* %a, i8* %b, i64 %s) nounwind { +; NOFSRM-LABEL: test1 +; NOFSRM: # %bb.0: +; NOFSRM: jmp memcpy +; +; FSRM-LABEL: test1 +; FSRM: # %bb.0: +; FSRM-NEXT: movq %rdx, %rcx +; FSRM-NEXT: rep;movsb (%rsi), %es:(%rdi) +; FSRM-NEXT: retq + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %s, i1 0) + ret void +} + +; Check that we don't crash due to a memcpy size type mismatch error ("Cannot +; emit physreg copy instruction") in X86InstrInfo::copyPhysReg. +%struct = type { [4096 x i8] } +declare void @foo(%struct* byval) +define void @test2(%struct* %x) { + call void @foo(%struct* byval %x) + ret void +}