diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -24,6 +24,10 @@ #define DEBUG_TYPE "x86-selectiondag-info" +static cl::opt + UseFSRMForMemcpy("x86-use-fsrm-for-memcpy", cl::Hidden, cl::init(false), + cl::desc("Use fast short rep mov in memcpy lowering")); + bool X86SelectionDAGInfo::isBaseRegConflictPossible( SelectionDAG &DAG, ArrayRef ClobberSet) const { // We cannot use TRI->hasBasePointer() until *after* we select all basic @@ -191,7 +195,9 @@ const unsigned SI = Use64BitRegs ? X86::RSI : X86::ESI; SDValue InFlag; - Chain = DAG.getCopyToReg(Chain, dl, CX, Size, InFlag); + Chain = DAG.getCopyToReg( + Chain, dl, CX, + DAG.getZExtOrTrunc(Size, dl, Use64BitRegs ? MVT::i64 : MVT::i32), InFlag); InFlag = Chain.getValue(1); Chain = DAG.getCopyToReg(Chain, dl, DI, Dst, InFlag); InFlag = Chain.getValue(1); @@ -306,6 +312,10 @@ const X86Subtarget &Subtarget = DAG.getMachineFunction().getSubtarget(); + // If enabled and available, use fast short rep mov. + if (UseFSRMForMemcpy && Subtarget.hasFSRM()) + return emitRepmovs(Subtarget, DAG, dl, Chain, Dst, Src, Size, MVT::i8); + /// Handle constant sizes, if (ConstantSDNode *ConstantSize = dyn_cast(Size)) return emitConstantSizeRepmov( diff --git a/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll b/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/memcpy-inline-fsrm.ll @@ -0,0 +1,22 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mattr=-fsrm < %s -o - | FileCheck %s --check-prefix=NOFSRM +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mattr=+fsrm < %s -o - | FileCheck %s --check-prefix=FSRM +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=haswell < %s | FileCheck %s --check-prefix=NOFSRM +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-client < %s | FileCheck %s --check-prefix=FSRM +; RUN: llc -mtriple=x86_64-linux-gnu -x86-use-fsrm-for-memcpy -mcpu=icelake-server < %s | FileCheck %s --check-prefix=FSRM + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind + +define void @test1(i8* %a, i8* %b, i64 %s) nounwind { +; NOFSRM-LABEL: test1 +; NOFSRM: # %bb.0: +; NOFSRM: jmp memcpy +; +; FSRM-LABEL: test1 +; FSRM: # %bb.0: +; FSRM-NEXT: movq %rdx, %rcx +; FSRM-NEXT: rep;movsb (%rsi), %es:(%rdi) +; FSRM-NEXT: retq + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* %b, i64 %s, i1 0) + ret void +}