diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -7782,7 +7782,6 @@ // of that value loaded. This can result in a sequence of loads and stores // mixed types, depending on what the target specifies as good types to use. unsigned CurrOffset = 0; - LLT PtrTy = MRI.getType(Src); unsigned Size = KnownLen; for (auto CopyTy : MemOps) { // Issuing an unaligned load / store pair that overlaps with the previous @@ -7800,15 +7799,20 @@ Register LoadPtr = Src; Register Offset; if (CurrOffset != 0) { - Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset) - .getReg(0); - LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); + LLT LoadTy = MRI.getType(Src); + Offset = + MIB.buildConstant(LLT::scalar(LoadTy.getSizeInBits()), CurrOffset) + .getReg(0); + LoadPtr = MIB.buildPtrAdd(LoadTy, Src, Offset).getReg(0); } auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO); // Create the store. - Register StorePtr = - CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); + Register StorePtr = Dst; + if (CurrOffset != 0) { + LLT StoreTy = MRI.getType(Dst); + StorePtr = MIB.buildPtrAdd(StoreTy, Dst, Offset).getReg(0); + } MIB.buildStore(LdVal, StorePtr, *StoreMMO); CurrOffset += CopyTy.getSizeInBytes(); Size -= CopyTy.getSizeInBytes(); @@ -7885,7 +7889,6 @@ // Apart from that, this loop is pretty much doing the same thing as the // memcpy codegen function. unsigned CurrOffset = 0; - LLT PtrTy = MRI.getType(Src); SmallVector LoadVals; for (auto CopyTy : MemOps) { // Construct MMO for the load. @@ -7895,9 +7898,10 @@ // Create the load. Register LoadPtr = Src; if (CurrOffset != 0) { + LLT LoadTy = MRI.getType(Src); auto Offset = - MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); - LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0); + MIB.buildConstant(LLT::scalar(LoadTy.getSizeInBits()), CurrOffset); + LoadPtr = MIB.buildPtrAdd(LoadTy, Src, Offset).getReg(0); } LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0)); CurrOffset += CopyTy.getSizeInBytes(); @@ -7912,9 +7916,10 @@ Register StorePtr = Dst; if (CurrOffset != 0) { + LLT StoreTy = MRI.getType(Dst); auto Offset = - MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset); - StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0); + MIB.buildConstant(LLT::scalar(StoreTy.getSizeInBits()), CurrOffset); + StorePtr = MIB.buildPtrAdd(StoreTy, Dst, Offset).getReg(0); } MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO); CurrOffset += CopyTy.getSizeInBytes(); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memcpy.mir @@ -13,6 +13,7 @@ } declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1 immarg) #1 + declare void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* nocapture writeonly, i8 addrspace(2)* nocapture readonly, i64, i1 immarg) #1 define void @test_memcpy2_const(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #0 { entry: @@ -46,6 +47,15 @@ ret void } + define void @test_memcpy_addrspace(i32 addrspace(1)* nocapture %dst, i32 addrspace(2)* nocapture readonly %src) local_unnamed_addr #0 { + entry: + %0 = bitcast i32 addrspace(1)* %dst to i8 addrspace(1)* + %1 = bitcast i32 addrspace(2)* %src to i8 addrspace(2)* + tail call void @llvm.memcpy.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %0, i8 addrspace(2)* align 4 %1, i64 72, i1 false) + ret void + } + + attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cyclone" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2,+zcm,+zcz" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { argmemonly nounwind } attributes #2 = { optsize } @@ -267,3 +277,51 @@ RET_ReallyLR ... +--- +name: test_memcpy_addrspace +alignment: 4 +tracksRegLiveness: true +registers: + - { id: 0, class: _ } + - { id: 1, class: _ } + - { id: 2, class: _ } +machineFunctionInfo: {} +body: | + bb.1.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_memcpy_addrspace + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p2) = COPY $x1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p2) :: (load (s128) from %ir.1, align 4, addrspace 2) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p1) :: (store (s128) into %ir.0, align 4, addrspace 1) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p2) :: (load (s128) from %ir.1 + 16, align 4, addrspace 2) + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C]](s64) + ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD1]](p1) :: (store (s128) into %ir.0 + 16, align 4, addrspace 1) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD2]](p2) :: (load (s128) from %ir.1 + 32, align 4, addrspace 2) + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C1]](s64) + ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p1) :: (store (s128) into %ir.0 + 32, align 4, addrspace 1) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 + ; CHECK: [[PTR_ADD4:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C2]](s64) + ; CHECK: [[LOAD3:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD4]](p2) :: (load (s128) from %ir.1 + 48, align 4, addrspace 2) + ; CHECK: [[PTR_ADD5:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[LOAD3]](s128), [[PTR_ADD5]](p1) :: (store (s128) into %ir.0 + 48, align 4, addrspace 1) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 + ; CHECK: [[PTR_ADD6:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C3]](s64) + ; CHECK: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD6]](p2) :: (load (s64) from %ir.1 + 64, align 4, addrspace 2) + ; CHECK: [[PTR_ADD7:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK: G_STORE [[LOAD4]](s64), [[PTR_ADD7]](p1) :: (store (s64) into %ir.0 + 64, align 4, addrspace 1) + ; CHECK: RET_ReallyLR + %0:_(p1) = COPY $x0 + %1:_(p2) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 72 + G_MEMCPY %0(p1), %1(p2), %2(s64), 1 :: (store (s8) into %ir.0, align 4, addrspace 1), (load (s8) from %ir.1, align 4, addrspace 2) + RET_ReallyLR + + +... diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/inline-memmove.mir @@ -13,6 +13,7 @@ } declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1 immarg) #1 + declare void @llvm.memmove.p1i8.p2i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(2)* nocapture readonly, i64, i1 immarg) #1 define void @test_memmove2_const(i32* nocapture %dst, i32* nocapture readonly %src) local_unnamed_addr #0 { entry: @@ -38,6 +39,14 @@ ret void } + define void @test_memmove_addrspace(i32 addrspace(1)* nocapture %dst, i32 addrspace(2)* nocapture readonly %src) local_unnamed_addr #0 { + entry: + %0 = bitcast i32 addrspace(1)* %dst to i8 addrspace(1)* + %1 = bitcast i32 addrspace(2)* %src to i8 addrspace(2)* + tail call void @llvm.memmove.p1i8.p2i8.i64(i8 addrspace(1)* align 4 %0, i8 addrspace(2)* align 4 %1, i64 8, i1 false) + ret void + } + attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cyclone" "target-features"="+aes,+crypto,+fp-armv8,+neon,+sha2,+zcm,+zcz" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { argmemonly nounwind } @@ -160,3 +169,38 @@ RET_ReallyLR ... +--- +name: test_memmove_addrspace +alignment: 4 +tracksRegLiveness: true +body: | + bb.1.entry: + liveins: $x0, $x1 + + ; CHECK-LABEL: name: test_memmove_addrspace + ; CHECK: liveins: $x0, $x1 + ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $x0 + ; CHECK: [[COPY1:%[0-9]+]]:_(p2) = COPY $x1 + ; CHECK: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p2) :: (load (s128) from %ir.1, align 4, addrspace 2) + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C]](s64) + ; CHECK: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD]](p2) :: (load (s128) from %ir.1 + 16, align 4, addrspace 2) + ; CHECK: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD1:%[0-9]+]]:_(p2) = G_PTR_ADD [[COPY1]], [[C1]](s64) + ; CHECK: [[LOAD2:%[0-9]+]]:_(s128) = G_LOAD [[PTR_ADD1]](p2) :: (load (s128) from %ir.1 + 32, align 4, addrspace 2) + ; CHECK: G_STORE [[LOAD]](s128), [[COPY]](p1) :: (store (s128) into %ir.0, align 4, addrspace 1) + ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 + ; CHECK: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C2]](s64) + ; CHECK: G_STORE [[LOAD1]](s128), [[PTR_ADD2]](p1) :: (store (s128) into %ir.0 + 16, align 4, addrspace 1) + ; CHECK: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 + ; CHECK: [[PTR_ADD3:%[0-9]+]]:_(p1) = G_PTR_ADD [[COPY]], [[C3]](s64) + ; CHECK: G_STORE [[LOAD2]](s128), [[PTR_ADD3]](p1) :: (store (s128) into %ir.0 + 32, align 4, addrspace 1) + ; CHECK: RET_ReallyLR + %0:_(p1) = COPY $x0 + %1:_(p2) = COPY $x1 + %2:_(s64) = G_CONSTANT i64 48 + G_MEMMOVE %0(p1), %1(p2), %2(s64), 1 :: (store (s8) into %ir.0, align 4), (load (s8) from %ir.1, align 4) + RET_ReallyLR + + +...