Index: llvm/include/llvm/Support/TargetOpcodes.def =================================================================== --- llvm/include/llvm/Support/TargetOpcodes.def +++ llvm/include/llvm/Support/TargetOpcodes.def @@ -700,6 +700,9 @@ /// llvm.memcpy intrinsic HANDLE_TARGET_OPCODE(G_MEMCPY) +/// llvm.memcpy.inline intrinsic +HANDLE_TARGET_OPCODE(G_MEMCPY_INLINE) + /// llvm.memmove intrinsic HANDLE_TARGET_OPCODE(G_MEMMOVE) Index: llvm/include/llvm/Target/GenericOpcodes.td =================================================================== --- llvm/include/llvm/Target/GenericOpcodes.td +++ llvm/include/llvm/Target/GenericOpcodes.td @@ -1285,6 +1285,14 @@ let mayStore = 1; } +def G_MEMCPY_INLINE : GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall); + let hasSideEffects = 0; + let mayLoad = 1; + let mayStore = 1; +} + def G_MEMMOVE : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall); Index: llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1731,6 +1731,8 @@ return true; case Intrinsic::memcpy: return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY); + case Intrinsic::memcpy_inline: + return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE); case Intrinsic::memmove: return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMMOVE); case Intrinsic::memset: Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -744,6 +744,8 @@ MI.eraseFromParent(); return Result; } + case TargetOpcode::G_MEMCPY_INLINE: + llvm_unreachable("emitting a libcall is illegal for memcpy_inline"); } MI.eraseFromParent(); Index: llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll +++ llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-memory-intrinsics.ll @@ -109,6 +109,23 @@ ret void } +define void @test_memcpy_inline_p1_p3_i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { + ; CHECK-LABEL: name: test_memcpy_inline_p1_p3_i32 + ; CHECK: bb.1 (%ir-block.0): + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(p3) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 256 + ; CHECK: G_MEMCPY_INLINE [[MV]](p1), [[COPY2]](p3), [[C]](s32), 0 :: (store 1 into %ir.dst, addrspace 1), (load 1 from %ir.src, addrspace 3) + ; CHECK: [[COPY4:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY3]] + ; CHECK: S_SETPC_B64_return [[COPY4]] + call void @llvm.memcpy.inline.p1i8.p3i8.i32(i8 addrspace(1)* %dst, i8 addrspace(3)* %src, i32 256, i1 false) + ret void +} + define void @test_memmove_p1_p3_i64(i8 addrspace(1)* %dst, i8 addrspace(3)* %src) { ; CHECK-LABEL: name: test_memmove_p1_p3_i64 ; CHECK: bb.1 (%ir-block.0): @@ -274,6 +291,9 @@ declare void @llvm.memcpy.p3i8.p1i8.i64(i8 addrspace(3)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i64, i1 immarg) #0 declare void @llvm.memcpy.p3i8.p1i8.i32(i8 addrspace(3)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i32, i1 immarg) #0 declare void @llvm.memcpy.p3i8.p1i8.i16(i8 addrspace(3)* noalias nocapture writeonly, i8 addrspace(1)* noalias nocapture readonly, i16, i1 immarg) #0 + +declare void @llvm.memcpy.inline.p1i8.p3i8.i32(i8 addrspace(1)* noalias nocapture writeonly, i8 addrspace(3)* noalias nocapture readonly, i32, i1 immarg) #0 + declare void @llvm.memmove.p1i8.p3i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i64, i1 immarg) #0 declare void @llvm.memmove.p1i8.p3i8.i32(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i32, i1 immarg) #0 declare void @llvm.memmove.p1i8.p3i8.i16(i8 addrspace(1)* nocapture, i8 addrspace(3)* nocapture readonly, i16, i1 immarg) #0