Index: llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -523,6 +523,8 @@ FunctionLoweringInfo FuncInfo; + const TargetLibraryInfo *LibInfo; + // True when either the Target Machine specifies no optimizations or the // function has the optnone attribute. bool EnableOpts = false; Index: llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/GlobalISel/CallLowering.h" @@ -89,6 +90,7 @@ INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI", @@ -153,6 +155,7 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); AU.addRequired(); AU.addRequired(); getSelectionDAGFallbackAnalysisUsage(AU); @@ -1780,6 +1783,8 @@ assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic"); + if (ID == Intrinsic::memcpy && !LibInfo->has(LibFunc_memcpy)) + return false; if (translateKnownIntrinsic(CI, ID, MIRBuilder)) return true; @@ -2413,6 +2418,8 @@ SL = std::make_unique(this, FuncInfo); SL->init(TLI, TM, *DL); + LibInfo = &getAnalysis().getTLI(F); + EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F); assert(PendingPHIs.empty() && "stale PHIs"); Index: llvm/lib/CodeGen/SelectionDAG/FastISel.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1509,6 +1509,10 @@ return selectXRayCustomEvent(II); case Intrinsic::xray_typedevent: return selectXRayTypedEvent(II); + case Intrinsic::memcpy: + if (!LibInfo->has(LibFunc_memcpy)) + return false; + break; } return fastLowerIntrinsicCall(II); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -27,6 +27,7 @@ #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -6434,7 +6435,7 @@ // If we really need inline code and the target declined to provide it, // use a (potentially long) sequence of loads and stores. - if (AlwaysInline) { + if (AlwaysInline || (!LibInfo->has(LibFunc_memcpy) && ConstantSize)) { assert(ConstantSize && "AlwaysInline requires a constant size!"); return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), Alignment, Index: llvm/test/CodeGen/AArch64/intrinsic-memcpy-nobuiltin.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/intrinsic-memcpy-nobuiltin.ll @@ -0,0 +1,19 @@ +; RUN: llc -mtriple=aarch64-linux-gnu < %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -O0 -global-isel < %s -o - | FileCheck %s +; RUN: llc -mtriple=aarch64-linux-gnu -O0 -fast-isel < %s -o - | FileCheck %s + +%struct.s = type { [512 x i8] } + +define void @copy(%struct.s* readonly %src, %struct.s* %dst) #0 { +entry: +; CHECK-LABEL: copy +; CHECK-NOT: memcpy + %0 = getelementptr %struct.s, %struct.s* %dst, i64 0, i32 0, i64 0 + %1 = getelementptr %struct.s, %struct.s* %src, i64 0, i32 0, i64 0 + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 dereferenceable(512) %0, i8* nonnull align 1 dereferenceable(512) %1, i64 512, i1 false) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) + +attributes #0 = { "no-builtins" }