diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h @@ -532,16 +532,23 @@ /// combine functions. Returns true if changed. bool tryCombine(MachineInstr &MI); + /// Emit loads and stores that perform the given memcpy. + /// Assumes \p MI is a G_MEMCPY_INLINE + /// TODO: implement dynamically sized inline memcpy, and rename: s/bool tryEmit/void emit/ + bool tryEmitMemcpyInline(MachineInstr &MI); private: // Memcpy family optimization helpers. + bool tryEmitMemcpyInline(MachineInstr &MI, Register Dst, Register Src, + uint64_t KnownLen, Align DstAlign, Align SrcAlign, + bool IsVolatile); bool optimizeMemcpy(MachineInstr &MI, Register Dst, Register Src, - unsigned KnownLen, Align DstAlign, Align SrcAlign, + uint64_t KnownLen, uint64_t Limit, Align DstAlign, Align SrcAlign, bool IsVolatile); bool optimizeMemmove(MachineInstr &MI, Register Dst, Register Src, - unsigned KnownLen, Align DstAlign, Align SrcAlign, + uint64_t KnownLen, Align DstAlign, Align SrcAlign, bool IsVolatile); bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val, - unsigned KnownLen, Align DstAlign, bool IsVolatile); + uint64_t KnownLen, Align DstAlign, bool IsVolatile); /// Given a non-indexed load or store instruction \p MI, find an offset that /// can be usefully and legally folded into it as a post-indexing operation. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1844,6 +1844,15 @@ DstMMO, SrcMMO); } + MachineInstrBuilder buildMemCpyInline(const SrcOp &DstPtr, const SrcOp &SrcPtr, + const SrcOp &Size, MachineMemOperand &DstMMO, + MachineMemOperand &SrcMMO) { + auto MIB = buildInstr(TargetOpcode::G_MEMCPY_INLINE, {}, {DstPtr, SrcPtr, Size}); + MIB.addMemOperand(&DstMMO); + MIB.addMemOperand(&SrcMMO); + return MIB; + } + /// Build and insert \p Dst = G_SBFX \p Src, \p LSB, \p Width. MachineInstrBuilder buildSbfx(const DstOp &Dst, const SrcOp &Src, const SrcOp &LSB, const SrcOp &Width) { diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -739,6 +739,9 @@ /// llvm.memcpy intrinsic HANDLE_TARGET_OPCODE(G_MEMCPY) +/// llvm.memcpy.inline intrinsic +HANDLE_TARGET_OPCODE(G_MEMCPY_INLINE) + /// llvm.memmove intrinsic HANDLE_TARGET_OPCODE(G_MEMMOVE) diff --git a/llvm/include/llvm/Target/GenericOpcodes.td b/llvm/include/llvm/Target/GenericOpcodes.td --- a/llvm/include/llvm/Target/GenericOpcodes.td +++ b/llvm/include/llvm/Target/GenericOpcodes.td @@ -1353,6 +1353,14 @@ let mayStore = true; } +def G_MEMCPY_INLINE : GenericInstruction { + let OutOperandList = (outs); + let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size); + let hasSideEffects = false; + let mayLoad = true; + let mayStore = true; +} + def G_MEMMOVE : GenericInstruction { let OutOperandList = (outs); let InOperandList = (ins ptype0:$dst_addr, ptype1:$src_addr, type2:$size, untyped_imm_0:$tailcall); diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -1218,7 +1218,7 @@ } bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, - Register Val, unsigned KnownLen, + Register Val, uint64_t KnownLen, Align Alignment, bool IsVolatile) { auto &MF = *MI.getParent()->getParent(); const auto &TLI = *MF.getSubtarget().getTargetLowering(); @@ -1330,8 +1330,46 @@ return true; } +bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); + + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + Register Len = MI.getOperand(2).getReg(); + + auto MMOIt = MI.memoperands_begin(); + const MachineMemOperand *MemOp = *MMOIt; + bool IsVolatile = MemOp->isVolatile(); + + // See if this is a constant length copy + auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); + if (!LenVRegAndVal) + return false; // FIXME: support dynamically sized G_MEMCPY_INLINE + + uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); + if (KnownLen == 0) { + MI.eraseFromParent(); + return true; + } + + const auto &DstMMO = **MI.memoperands_begin(); + const auto &SrcMMO = **std::next(MI.memoperands_begin()); + Align DstAlign = DstMMO.getBaseAlign(); + Align SrcAlign = SrcMMO.getBaseAlign(); + + return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); +} + +bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst, + Register Src, uint64_t KnownLen, + Align DstAlign, Align SrcAlign, + bool IsVolatile) { + assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE); + return optimizeMemcpy(MI, Dst, Src, KnownLen, std::numeric_limits::max(), DstAlign, SrcAlign, IsVolatile); +} + bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst, - Register Src, unsigned KnownLen, + Register Src, uint64_t KnownLen, uint64_t Limit, Align DstAlign, Align SrcAlign, bool IsVolatile) { auto &MF = *MI.getParent()->getParent(); @@ -1343,7 +1381,6 @@ bool DstAlignCanChange = false; MachineFrameInfo &MFI = MF.getFrameInfo(); - bool OptSize = shouldLowerMemFuncForSize(MF); Align Alignment = commonAlignment(DstAlign, SrcAlign); MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI); @@ -1354,7 +1391,6 @@ // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining // if the memcpy is in a tail call position. - unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize); std::vector MemOps; const auto &DstMMO = **MI.memoperands_begin(); @@ -1437,7 +1473,7 @@ } bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst, - Register Src, unsigned KnownLen, + Register Src, uint64_t KnownLen, Align DstAlign, Align SrcAlign, bool IsVolatile) { auto &MF = *MI.getParent()->getParent(); @@ -1550,10 +1586,6 @@ auto MMOIt = MI.memoperands_begin(); const MachineMemOperand *MemOp = *MMOIt; - bool IsVolatile = MemOp->isVolatile(); - // Don't try to optimize volatile. - if (IsVolatile) - return false; Align DstAlign = MemOp->getBaseAlign(); Align SrcAlign; @@ -1571,18 +1603,31 @@ auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI); if (!LenVRegAndVal) return false; // Leave it to the legalizer to lower it to a libcall. - unsigned KnownLen = LenVRegAndVal->Value.getZExtValue(); + uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue(); if (KnownLen == 0) { MI.eraseFromParent(); return true; } + bool IsVolatile = MemOp->isVolatile(); + if (Opc == TargetOpcode::G_MEMCPY_INLINE) + return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); + + // Don't try to optimize volatile. + if (IsVolatile) + return false; + if (MaxLen && KnownLen > MaxLen) return false; - if (Opc == TargetOpcode::G_MEMCPY) - return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); + if (Opc == TargetOpcode::G_MEMCPY) { + auto &MF = *MI.getParent()->getParent(); + const auto &TLI = *MF.getSubtarget().getTargetLowering(); + bool OptSize = shouldLowerMemFuncForSize(MF); + uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize); + return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign, IsVolatile); + } if (Opc == TargetOpcode::G_MEMMOVE) return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile); if (Opc == TargetOpcode::G_MEMSET) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1589,6 +1589,9 @@ if (auto *MCI = dyn_cast(&CI)) { DstAlign = MCI->getDestAlign().valueOrOne(); SrcAlign = MCI->getSourceAlign().valueOrOne(); + } else if (auto *MCI = dyn_cast(&CI)) { + DstAlign = MCI->getDestAlign().valueOrOne(); + SrcAlign = MCI->getSourceAlign().valueOrOne(); } else if (auto *MMI = dyn_cast(&CI)) { DstAlign = MMI->getDestAlign().valueOrOne(); SrcAlign = MMI->getSourceAlign().valueOrOne(); @@ -1597,10 +1600,12 @@ DstAlign = MSI->getDestAlign().valueOrOne(); } - // We need to propagate the tail call flag from the IR inst as an argument. - // Otherwise, we have to pessimize and assume later that we cannot tail call - // any memory intrinsics. - ICall.addImm(CI.isTailCall() ? 1 : 0); + if (Opcode != TargetOpcode::G_MEMCPY_INLINE) { + // We need to propagate the tail call flag from the IR inst as an argument. + // Otherwise, we have to pessimize and assume later that we cannot tail call + // any memory intrinsics. + ICall.addImm(CI.isTailCall() ? 1 : 0); + } // Create mem operands to store the alignment and volatile info. auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone; @@ -2031,6 +2036,8 @@ getOrCreateVReg(*CI.getArgOperand(0)), MachineInstr::copyFlagsFromInstruction(CI)); return true; + case Intrinsic::memcpy_inline: + return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE); case Intrinsic::memcpy: return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY); case Intrinsic::memmove: diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp --- a/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/llvm/lib/CodeGen/MachineVerifier.cpp @@ -1477,6 +1477,7 @@ } break; } + case TargetOpcode::G_MEMCPY_INLINE: case TargetOpcode::G_MEMCPY: case TargetOpcode::G_MEMMOVE: { ArrayRef MMOs = MI->memoperands(); @@ -1507,6 +1508,10 @@ if (SrcPtrTy.getAddressSpace() != MMOs[1]->getAddrSpace()) report("inconsistent load address space", MI); + if (Opc != TargetOpcode::G_MEMCPY_INLINE) + if (!MI->getOperand(3).isImm() || (MI->getOperand(3).getImm() & ~1LL)) + report("'tail' flag (operand 3) must be an immediate 0 or 1", MI); + break; } case TargetOpcode::G_BZERO: @@ -1532,6 +1537,11 @@ if (DstPtrTy.getAddressSpace() != MMOs[0]->getAddrSpace()) report("inconsistent " + Twine(Name, " address space"), MI); + if (!MI->getOperand(MI->getNumOperands() - 1).isImm() || + (MI->getOperand(MI->getNumOperands() - 1).getImm() & ~1LL)) + report("'tail' flag (last operand) must be an immediate 0 or 1", + MI); + break; } case TargetOpcode::G_VECREDUCE_SEQ_FADD: diff --git a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64O0PreLegalizerCombiner.cpp @@ -85,6 +85,8 @@ return Helper.tryCombineConcatVectors(MI); case TargetOpcode::G_SHUFFLE_VECTOR: return Helper.tryCombineShuffleVector(MI); + case TargetOpcode::G_MEMCPY_INLINE: + return Helper.tryEmitMemcpyInline(MI); case TargetOpcode::G_MEMCPY: case TargetOpcode::G_MEMMOVE: case TargetOpcode::G_MEMSET: { diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -272,11 +272,13 @@ return Helper.tryCombineConcatVectors(MI); case TargetOpcode::G_SHUFFLE_VECTOR: return Helper.tryCombineShuffleVector(MI); + case TargetOpcode::G_MEMCPY_INLINE: + return Helper.tryEmitMemcpyInline(MI); case TargetOpcode::G_MEMCPY: case TargetOpcode::G_MEMMOVE: case TargetOpcode::G_MEMSET: { // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other - // heuristics decide. + // heuristics decide (unless it is an llvm.memcpy.inline, of course). unsigned MaxLen = EnableOpt ? 0 : 32; // Try to inline memcpy type calls if optimizations are enabled. if (Helper.tryCombineMemCpyFamily(MI, MaxLen)) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp @@ -205,6 +205,8 @@ return true; switch (MI.getOpcode()) { + case TargetOpcode::G_MEMCPY_INLINE: + return Helper.tryEmitMemcpyInline(MI); case TargetOpcode::G_CONCAT_VECTORS: return Helper.tryCombineConcatVectors(MI); case TargetOpcode::G_SHUFFLE_VECTOR: diff --git a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp --- a/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp +++ b/llvm/lib/Target/Mips/MipsPreLegalizerCombiner.cpp @@ -42,6 +42,8 @@ switch (MI.getOpcode()) { default: return false; + case TargetOpcode::G_MEMCPY_INLINE: + return Helper.tryEmitMemcpyInline(MI); case TargetOpcode::G_LOAD: case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-opt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-opt.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy-opt.mir @@ -0,0 +1,269 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s +--- | + target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + target triple = "arm64-apple-darwin" + + define void @test_memcpy1(i32* nocapture %dst, i32* nocapture readonly %src, i64 %len) local_unnamed_addr #0 { + entry: + %0 = bitcast i32* %dst to i8* + %1 = bitcast i32* %src to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 %len, i1 false) + ret void + } + + declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1 + + define void @test_memcpy2_const(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #0 { + entry: + %0 = bitcast i32* %dst to i8* + %1 = bitcast i32* %src to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false) + ret void + } + + define void @test_memcpy2_const_optsize(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #2 { + entry: + %0 = bitcast i32* %dst to i8* + %1 = bitcast i32* %src to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false) + ret void + } + + define void @test_memcpy2_const_minsize(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #3 { + entry: + %0 = bitcast i32* %dst to i8* + %1 = bitcast i32* %src to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false) + ret void + } + + define void @test_memcpy3_const_arrays_unaligned(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #0 { + entry: + %0 = bitcast i32* %dst to i8* + %1 = bitcast i32* %src to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 143, i1 false) + ret void + } + + attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cyclone" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2,+zcm,+zcz" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { argmemonly nounwind } + attributes #2 = { optsize } + attributes #3 = { minsize } + +... +--- +name: test_memcpy1 +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1, $x2 + + ; CHECK-LABEL: name: test_memcpy1 + ; CHECK: liveins: $x0, $x1, $x2 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s64) = COPY $x2 + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + RET_ReallyLR + +... +--- +name: test_memcpy2_const +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_memcpy2_const + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4) + ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4) + ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load 8 from %ir.1 + 64, align 4) + ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store 8 into %ir.0 + 64, align 4) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 72 + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + RET_ReallyLR + +... +--- +name: test_memcpy2_const_optsize +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_memcpy2_const_optsize + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4) + ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4) + ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load 8 from %ir.1 + 64, align 4) + ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store 8 into %ir.0 + 64, align 4) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 72 + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + RET_ReallyLR + +... +--- +name: test_memcpy2_const_minsize +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_memcpy2_const_minsize + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72 + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 72 + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + RET_ReallyLR + +... +--- +name: test_memcpy3_const_arrays_unaligned +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_memcpy3_const_arrays_unaligned + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4) + ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4) + ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[GEP6]](p0) :: (load 16 from %ir.1 + 64, align 4) + ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK: G_STORE [[LOAD4]](s128), [[GEP7]](p0) :: (store 16 into %ir.0 + 64, align 4) + ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 + ; CHECK: [[GEP8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64) + ; CHECK: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[GEP8]](p0) :: (load 16 from %ir.1 + 80, align 4) + ; CHECK: [[GEP9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK: G_STORE [[LOAD5]](s128), [[GEP9]](p0) :: (store 16 into %ir.0 + 80, align 4) + ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 + ; CHECK: [[GEP10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[GEP10]](p0) :: (load 16 from %ir.1 + 96, align 4) + ; CHECK: [[GEP11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK: G_STORE [[LOAD6]](s128), [[GEP11]](p0) :: (store 16 into %ir.0 + 96, align 4) + ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 + ; CHECK: [[GEP12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C6]](s64) + ; CHECK: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[GEP12]](p0) :: (load 16 from %ir.1 + 112, align 4) + ; CHECK: [[GEP13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; CHECK: G_STORE [[LOAD7]](s128), [[GEP13]](p0) :: (store 16 into %ir.0 + 112, align 4) + ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 127 + ; CHECK: [[GEP14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C7]](s64) + ; CHECK: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[GEP14]](p0) :: (load 16 from %ir.1 + 127, align 1, basealign 4) + ; CHECK: [[GEP15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; CHECK: G_STORE [[LOAD8]](s128), [[GEP15]](p0) :: (store 16 into %ir.0 + 127, align 1, basealign 4) + ; CHECK: RET_ReallyLR + %0:_(p0) = COPY $x0 + %1:_(p0) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 143 + G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir @@ -4,21 +4,21 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" target triple = "arm64-apple-darwin" - define void @test_memcpy1(i32* nocapture %dst, i32* nocapture readonly %src, i64 %len) local_unnamed_addr #0 { + define void @test_memcpy1(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #0 { entry: %0 = bitcast i32* %dst to i8* %1 = bitcast i32* %src to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 %len, i1 false) + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 13, i1 false) ret void } - declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1 + declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1 define void @test_memcpy2_const(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #0 { entry: %0 = bitcast i32* %dst to i8* %1 = bitcast i32* %src to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false) + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false) ret void } @@ -26,7 +26,7 @@ entry: %0 = bitcast i32* %dst to i8* %1 = bitcast i32* %src to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false) + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false) ret void } @@ -34,7 +34,7 @@ entry: %0 = bitcast i32* %dst to i8* %1 = bitcast i32* %src to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false) + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 72, i1 false) ret void } @@ -42,7 +42,7 @@ entry: %0 = bitcast i32* %dst to i8* %1 = bitcast i32* %src to i8* - tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 143, i1 false) + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 143, i1 false) ret void } @@ -65,17 +65,19 @@ bb.1.entry: liveins: $x0, $x1, $x2 + ; FIXME: This test should be adjusted when we get support for dynamically-sized G_MEMCPY_INLINE + ; ; CHECK-LABEL: name: test_memcpy1 ; CHECK: liveins: $x0, $x1, $x2 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2 - ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[COPY2]](s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = COPY $x2 - G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY_INLINE %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) RET_ReallyLR ... @@ -99,30 +101,30 @@ ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4) - ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4) - ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load 16 from %ir.1 + 32, align 4) + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store 16 into %ir.0 + 32, align 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4) - ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4) + ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load 16 from %ir.1 + 48, align 4) + ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store 16 into %ir.0 + 48, align 4) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load 8 from %ir.1 + 64, align 4) - ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store 8 into %ir.0 + 64, align 4) + ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load 8 from %ir.1 + 64, align 4) + ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK: G_STORE [[LOAD4]](s64), [[PTR_ADD7]](p0) :: (store 8 into %ir.0 + 64, align 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = G_CONSTANT i64 72 - G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY_INLINE %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) RET_ReallyLR ... @@ -146,30 +148,30 @@ ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4) - ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4) - ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load 16 from %ir.1 + 32, align 4) + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store 16 into %ir.0 + 32, align 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4) - ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4) + ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load 16 from %ir.1 + 48, align 4) + ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store 16 into %ir.0 + 48, align 4) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[GEP6]](p0) :: (load 8 from %ir.1 + 64, align 4) - ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK: G_STORE [[LOAD4]](s64), [[GEP7]](p0) :: (store 8 into %ir.0 + 64, align 4) + ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load 8 from %ir.1 + 64, align 4) + ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK: G_STORE [[LOAD4]](s64), [[PTR_ADD7]](p0) :: (store 8 into %ir.0 + 64, align 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = G_CONSTANT i64 72 - G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY_INLINE %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) RET_ReallyLR ... @@ -190,13 +192,33 @@ ; CHECK: liveins: $x0, $x1 ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 - ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 72 - ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[C]](s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load 16 from %ir.1 + 32, align 4) + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load 16 from %ir.1 + 48, align 4) + ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store 16 into %ir.0 + 48, align 4) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p0) :: (load 8 from %ir.1 + 64, align 4) + ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK: G_STORE [[LOAD4]](s64), [[PTR_ADD7]](p0) :: (store 8 into %ir.0 + 64, align 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = G_CONSTANT i64 72 - G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY_INLINE %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) RET_ReallyLR ... @@ -220,50 +242,50 @@ ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load 16 from %ir.1, align 4) ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p0) :: (store 16 into %ir.0, align 4) ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 - ; CHECK: [[GEP:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) - ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[GEP]](p0) :: (load 16 from %ir.1 + 16, align 4) - ; CHECK: [[GEP1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) - ; CHECK: G_STORE [[LOAD1]](s128), [[GEP1]](p0) :: (store 16 into %ir.0 + 16, align 4) + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p0) :: (load 16 from %ir.1 + 16, align 4) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p0) :: (store 16 into %ir.0 + 16, align 4) ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 - ; CHECK: [[GEP2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) - ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[GEP2]](p0) :: (load 16 from %ir.1 + 32, align 4) - ; CHECK: [[GEP3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) - ; CHECK: G_STORE [[LOAD2]](s128), [[GEP3]](p0) :: (store 16 into %ir.0 + 32, align 4) + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p0) :: (load 16 from %ir.1 + 32, align 4) + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p0) :: (store 16 into %ir.0 + 32, align 4) ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 - ; CHECK: [[GEP4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) - ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[GEP4]](p0) :: (load 16 from %ir.1 + 48, align 4) - ; CHECK: [[GEP5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) - ; CHECK: G_STORE [[LOAD3]](s128), [[GEP5]](p0) :: (store 16 into %ir.0 + 48, align 4) + ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p0) :: (load 16 from %ir.1 + 48, align 4) + ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p0) :: (store 16 into %ir.0 + 48, align 4) ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 - ; CHECK: [[GEP6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) - ; CHECK: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[GEP6]](p0) :: (load 16 from %ir.1 + 64, align 4) - ; CHECK: [[GEP7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) - ; CHECK: G_STORE [[LOAD4]](s128), [[GEP7]](p0) :: (store 16 into %ir.0 + 64, align 4) + ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD6]](p0) :: (load 16 from %ir.1 + 64, align 4) + ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK: G_STORE [[LOAD4]](s128), [[PTR_ADD7]](p0) :: (store 16 into %ir.0 + 64, align 4) ; CHECK: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 80 - ; CHECK: [[GEP8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64) - ; CHECK: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[GEP8]](p0) :: (load 16 from %ir.1 + 80, align 4) - ; CHECK: [[GEP9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) - ; CHECK: G_STORE [[LOAD5]](s128), [[GEP9]](p0) :: (store 16 into %ir.0 + 80, align 4) + ; CHECK: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C4]](s64) + ; CHECK: [[LOAD5:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD8]](p0) :: (load 16 from %ir.1 + 80, align 4) + ; CHECK: [[PTR_ADD9:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64) + ; CHECK: G_STORE [[LOAD5]](s128), [[PTR_ADD9]](p0) :: (store 16 into %ir.0 + 80, align 4) ; CHECK: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 96 - ; CHECK: [[GEP10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64) - ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[GEP10]](p0) :: (load 16 from %ir.1 + 96, align 4) - ; CHECK: [[GEP11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) - ; CHECK: G_STORE [[LOAD6]](s128), [[GEP11]](p0) :: (store 16 into %ir.0 + 96, align 4) + ; CHECK: [[PTR_ADD10:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C5]](s64) + ; CHECK: [[LOAD6:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD10]](p0) :: (load 16 from %ir.1 + 96, align 4) + ; CHECK: [[PTR_ADD11:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s64) + ; CHECK: G_STORE [[LOAD6]](s128), [[PTR_ADD11]](p0) :: (store 16 into %ir.0 + 96, align 4) ; CHECK: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 112 - ; CHECK: [[GEP12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C6]](s64) - ; CHECK: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[GEP12]](p0) :: (load 16 from %ir.1 + 112, align 4) - ; CHECK: [[GEP13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) - ; CHECK: G_STORE [[LOAD7]](s128), [[GEP13]](p0) :: (store 16 into %ir.0 + 112, align 4) + ; CHECK: [[PTR_ADD12:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C6]](s64) + ; CHECK: [[LOAD7:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD12]](p0) :: (load 16 from %ir.1 + 112, align 4) + ; CHECK: [[PTR_ADD13:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C6]](s64) + ; CHECK: G_STORE [[LOAD7]](s128), [[PTR_ADD13]](p0) :: (store 16 into %ir.0 + 112, align 4) ; CHECK: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 127 - ; CHECK: [[GEP14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C7]](s64) - ; CHECK: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[GEP14]](p0) :: (load 16 from %ir.1 + 127, align 1, basealign 4) - ; CHECK: [[GEP15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) - ; CHECK: G_STORE [[LOAD8]](s128), [[GEP15]](p0) :: (store 16 into %ir.0 + 127, align 1, basealign 4) + ; CHECK: [[PTR_ADD14:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C7]](s64) + ; CHECK: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD14]](p0) :: (load 16 from %ir.1 + 127, align 1, basealign 4) + ; CHECK: [[PTR_ADD15:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C7]](s64) + ; CHECK: G_STORE [[LOAD8]](s128), [[PTR_ADD15]](p0) :: (store 16 into %ir.0 + 127, align 1, basealign 4) ; CHECK: RET_ReallyLR %0:_(p0) = COPY $x0 %1:_(p0) = COPY $x1 %2:_(s64) = G_CONSTANT i64 143 - G_MEMCPY %0(p0), %1(p0), %2(s64), 1 :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + G_MEMCPY_INLINE %0(p0), %1(p0), %2(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) RET_ReallyLR ... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-memcpy-inline.ll @@ -0,0 +1,125 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-unknown -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=irtranslator %s -o - | FileCheck %s + +define void @copy(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: copy + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 0 :: (store 1 into %ir.dst), (load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) + ret void +} + +define void @inline_copy(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: inline_copy + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (store 1 into %ir.dst), (load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) + ret void +} + +define void @copy_volatile(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: copy_volatile + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 0 :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) + ret void +} + +define void @inline_copy_volatile(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: inline_copy_volatile + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) + ret void +} + +define void @tail_copy(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: tail_copy + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 1 :: (store 1 into %ir.dst), (load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) + ret void +} + +define void @tail_inline_copy(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: tail_inline_copy + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (store 1 into %ir.dst), (load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + tail call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 false) + ret void +} + +define void @tail_copy_volatile(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: tail_copy_volatile + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64), 1 :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) + ret void +} + +define void @tail_inline_copy_volatile(i8* %dst, i8* %src) { + ; CHECK-LABEL: name: tail_inline_copy_volatile + ; CHECK: bb.1.entry: + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 4 + ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[C]](s32) + ; CHECK: G_MEMCPY_INLINE [[COPY]](p0), [[COPY1]](p0), [[ZEXT]](s64) :: (volatile store 1 into %ir.dst), (volatile load 1 from %ir.src) + ; CHECK: RET_ReallyLR +entry: + tail call void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* %dst, i8* %src, i32 4, i1 true) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) nounwind +declare void @llvm.memcpy.inline.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) nounwind diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -629,6 +629,9 @@ # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected +# DEBUG-NEXT: G_MEMCPY_INLINE (opcode 219): 3 type indices, 0 imm indices +# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_MEMMOVE (opcode {{[0-9]+}}): 3 type indices, 1 imm index # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-memcpy-inline.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-memcpy-inline.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-memcpy-inline.mir @@ -0,0 +1,81 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs -o - %s | FileCheck %s +--- | + target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7" + target triple = "amdgcn-amd-amdhsa" + + declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) #0 + + define void @test_memcpy_inline(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #1 { + entry: + %0 = bitcast i32* %dst to i8* + %1 = bitcast i32* %src to i8* + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 13, i1 false) + ret void + } + + attributes #0 = { argmemonly nofree nounwind willreturn "target-cpu"="gfx900" } + attributes #1 = { "target-cpu"="gfx900" } + +... +--- +name: test_memcpy_inline +alignment: 1 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: sgpr_64 } + - { id: 3, class: _ } + - { id: 4, class: _ } + - { id: 5, class: _ } + - { id: 6, class: _ } + - { id: 7, class: _ } + - { id: 8, class: ccr_sgpr_64 } +liveins: + - { reg: '$sgpr30_sgpr31', virtual-reg: '%2' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: + maxKernArgAlign: 1 + scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3' + frameOffsetReg: '$sgpr33' + stackPtrOffsetReg: '$sgpr32' + argumentInfo: + privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' } + occupancy: 10 +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + + ; CHECK-LABEL: name: test_memcpy_inline + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $sgpr30_sgpr31 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; CHECK: [[COPY4:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31 + ; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[MV1]](p0) :: (load 8 from %ir.1, align 4) + ; CHECK: G_STORE [[LOAD]](s64), [[MV]](p0) :: (store 8 into %ir.0, align 4) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 5 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[MV1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD]](p0) :: (load 8 from %ir.1 + 5, align 1, basealign 4) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[MV]], [[C]](s64) + ; CHECK: G_STORE [[LOAD1]](s64), [[PTR_ADD1]](p0) :: (store 8 into %ir.0 + 5, align 1, basealign 4) + ; CHECK: [[COPY5:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY4]] + ; CHECK: S_SETPC_B64_return [[COPY5]] + %3:_(s32) = COPY $vgpr0 + %4:_(s32) = COPY $vgpr1 + %0:_(p0) = G_MERGE_VALUES %3(s32), %4(s32) + %5:_(s32) = COPY $vgpr2 + %6:_(s32) = COPY $vgpr3 + %1:_(p0) = G_MERGE_VALUES %5(s32), %6(s32) + %2:sgpr_64 = COPY $sgpr30_sgpr31 + %7:_(s64) = G_CONSTANT i64 13 + G_MEMCPY_INLINE %0(p0), %1(p0), %7(s64) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + %8:ccr_sgpr_64 = COPY %2 + S_SETPC_B64_return %8 + +... diff --git a/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.mir b/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.mir @@ -0,0 +1,60 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -O0 -mtriple=mipsel-linux-gnu -run-pass=mips-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s -check-prefixes=MIPS32 +--- | + ; ModuleID = '../llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.ll' + source_filename = "../llvm/test/CodeGen/Mips/GlobalISel/mips-prelegalizer-combiner/inline-memcpy.ll" + target datalayout = "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64" + target triple = "mipsel-pc-linux-gnu" + + declare void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64 immarg, i1 immarg) #0 + + define void @test_memcpy_inline(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr { + entry: + %0 = bitcast i32* %dst to i8* + %1 = bitcast i32* %src to i8* + tail call void @llvm.memcpy.inline.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 2, i1 false) + ret void + } + + attributes #0 = { argmemonly nofree nounwind willreturn } + +... +--- +name: test_memcpy_inline +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } + - { id: 3, class: _ } +liveins: + - { reg: '$a0' } + - { reg: '$a1' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $a0, $a1 + + ; MIPS32-LABEL: name: test_memcpy_inline + ; MIPS32: liveins: $a0, $a1 + ; MIPS32: [[COPY:%[0-9]+]]:_(p0) = COPY $a0 + ; MIPS32: [[COPY1:%[0-9]+]]:_(p0) = COPY $a1 + ; MIPS32: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[COPY1]](p0) :: (load 1 from %ir.1, align 4) + ; MIPS32: G_STORE [[LOAD]](s8), [[COPY]](p0) :: (store 1 into %ir.0, align 4) + ; MIPS32: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; MIPS32: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY1]], [[C]](s32) + ; MIPS32: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD]](p0) :: (load 1 from %ir.1 + 1, basealign 4) + ; MIPS32: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C]](s32) + ; MIPS32: G_STORE [[LOAD1]](s8), [[PTR_ADD1]](p0) :: (store 1 into %ir.0 + 1, basealign 4) + ; MIPS32: RetRA + %0:_(p0) = COPY $a0 + %1:_(p0) = COPY $a1 + %2:_(s64) = G_CONSTANT i64 2 + %3:_(s32) = G_TRUNC %2(s64) + G_MEMCPY_INLINE %0(p0), %1(p0), %3(s32) :: (store 1 into %ir.0, align 4), (load 1 from %ir.1, align 4) + RetRA + +... diff --git a/llvm/test/MachineVerifier/test_g_bzero.mir b/llvm/test/MachineVerifier/test_g_bzero.mir --- a/llvm/test/MachineVerifier/test_g_bzero.mir +++ b/llvm/test/MachineVerifier/test_g_bzero.mir @@ -27,7 +27,9 @@ ; CHECK: *** Bad machine code: inconsistent bzero address space *** G_BZERO %ptr, %cst2, 0 :: (store 4, addrspace 1) - ; CHECK: *** Bad machine code: bzero operand must be a pointer *** + ; CHECK: *** Bad machine code: bzero operand must be a pointer *** G_BZERO %cst1, %cst2, 0 :: (store 4) + ; CHECK: *** Bad machine code: 'tail' flag (last operand) must be an immediate 0 or 1 *** + G_BZERO %ptr, %cst2, 2 :: (store 4) ... diff --git a/llvm/test/MachineVerifier/test_g_memcpy.mir b/llvm/test/MachineVerifier/test_g_memcpy.mir --- a/llvm/test/MachineVerifier/test_g_memcpy.mir +++ b/llvm/test/MachineVerifier/test_g_memcpy.mir @@ -47,4 +47,9 @@ ; CHECK: *** Bad machine code: memory instruction operand must be a pointer *** G_MEMCPY %0, %2, %2, 0 :: (store 4), (load 4) + ; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 *** + G_MEMCPY %0, %0, %2, %0 :: (store 4), (load 4) + + ; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 *** + G_MEMCPY %0, %0, %2, 2 :: (store 4), (load 4) ... diff --git a/llvm/test/MachineVerifier/test_g_memcpy_inline.mir b/llvm/test/MachineVerifier/test_g_memcpy_inline.mir new file mode 100644 --- /dev/null +++ b/llvm/test/MachineVerifier/test_g_memcpy_inline.mir @@ -0,0 +1,49 @@ +#RUN: not --crash llc -o - -march=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s +# REQUIRES: aarch64-registered-target +--- +name: test_memcpy_inline +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: +body: | + bb.0: + + %0:_(p0) = G_CONSTANT i64 0 + %1:_(p0) = G_CONSTANT i64 4 + %2:_(s64) = G_CONSTANT i64 4 + + ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands *** + G_MEMCPY_INLINE %0, %1, %2 + + ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands *** + G_MEMCPY_INLINE %0, %1, %2 :: (load 4) + + ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands *** + G_MEMCPY_INLINE %0, %1, %2 :: (store 4) + + ; CHECK: *** Bad machine code: wrong memory operand types *** + G_MEMCPY_INLINE %0, %1, %2 :: (load 4), (store 4) + + ; CHECK: *** Bad machine code: inconsistent memory operand sizes *** + G_MEMCPY_INLINE %0, %1, %2 :: (store 8), (load 4) + + ; CHECK: *** Bad machine code: inconsistent memory operand sizes *** + G_MEMCPY_INLINE %0, %1, %2 :: (store unknown-size), (load 4) + + ; CHECK: *** Bad machine code: inconsistent memory operand sizes *** + G_MEMCPY_INLINE %0, %1, %2 :: (store 8), (load unknown-size) + + ; CHECK: *** Bad machine code: inconsistent store address space *** + G_MEMCPY_INLINE %0, %1, %2 :: (store 4, addrspace 1), (load 4) + + ; CHECK: *** Bad machine code: inconsistent load address space *** + G_MEMCPY_INLINE %0, %1, %2 :: (store 4), (load 4, addrspace 1) + + ; CHECK: *** Bad machine code: memory instruction operand must be a pointer *** + G_MEMCPY_INLINE %2, %0, %2 :: (store 4), (load 4) + + ; CHECK: *** Bad machine code: memory instruction operand must be a pointer *** + G_MEMCPY_INLINE %0, %2, %2 :: (store 4), (load 4) +... diff --git a/llvm/test/MachineVerifier/test_g_memmove.mir b/llvm/test/MachineVerifier/test_g_memmove.mir new file mode 100644 --- /dev/null +++ b/llvm/test/MachineVerifier/test_g_memmove.mir @@ -0,0 +1,55 @@ +#RUN: not --crash llc -o - -march=arm64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s +# REQUIRES: aarch64-registered-target +--- +name: test_memmove +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: +body: | + bb.0: + + %0:_(p0) = G_CONSTANT i64 0 + %1:_(p0) = G_CONSTANT i64 4 + %2:_(s64) = G_CONSTANT i64 4 + + ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands *** + G_MEMMOVE %0, %1, %2, 0 + + ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands *** + G_MEMMOVE %0, %1, %2, 0 :: (load 4) + + ; CHECK: *** Bad machine code: memcpy/memmove must have 2 memory operands *** + G_MEMMOVE %0, %1, %2, 0 :: (store 4) + + ; CHECK: *** Bad machine code: wrong memory operand types *** + G_MEMMOVE %0, %1, %2, 0 :: (load 4), (store 4) + + ; CHECK: *** Bad machine code: inconsistent memory operand sizes *** + G_MEMMOVE %0, %1, %2, 0 :: (store 8), (load 4) + + ; CHECK: *** Bad machine code: inconsistent memory operand sizes *** + G_MEMMOVE %0, %1, %2, 0 :: (store unknown-size), (load 4) + + ; CHECK: *** Bad machine code: inconsistent memory operand sizes *** + G_MEMMOVE %0, %1, %2, 0 :: (store 8), (load unknown-size) + + ; CHECK: *** Bad machine code: inconsistent store address space *** + G_MEMMOVE %0, %1, %2, 0 :: (store 4, addrspace 1), (load 4) + + ; CHECK: *** Bad machine code: inconsistent load address space *** + G_MEMMOVE %0, %1, %2, 0 :: (store 4), (load 4, addrspace 1) + + ; CHECK: *** Bad machine code: memory instruction operand must be a pointer *** + G_MEMMOVE %2, %0, %2, 0 :: (store 4), (load 4) + + ; CHECK: *** Bad machine code: memory instruction operand must be a pointer *** + G_MEMMOVE %0, %2, %2, 0 :: (store 4), (load 4) + + ; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 *** + G_MEMMOVE %0, %0, %2, %0 :: (store 4), (load 4) + + ; CHECK: *** Bad machine code: 'tail' flag (operand 3) must be an immediate 0 or 1 *** + G_MEMMOVE %0, %0, %2, 2 :: (store 4), (load 4) +...