diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2744,6 +2744,35 @@ return; } + // Copy a Z register pair by copying the individual sub-registers. + if (AArch64::ZPR2RegClass.contains(DestReg) && + AArch64::ZPR2RegClass.contains(SrcReg)) { + static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1}; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, + Indices); + return; + } + + // Copy a Z register triple by copying the individual sub-registers. + if (AArch64::ZPR3RegClass.contains(DestReg) && + AArch64::ZPR3RegClass.contains(SrcReg)) { + static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, + AArch64::zsub2}; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, + Indices); + return; + } + + // Copy a Z register quad by copying the individual sub-registers. + if (AArch64::ZPR4RegClass.contains(DestReg) && + AArch64::ZPR4RegClass.contains(SrcReg)) { + static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1, + AArch64::zsub2, AArch64::zsub3}; + copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ, + Indices); + return; + } + if (AArch64::GPR64spRegClass.contains(DestReg) && (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) { if (DestReg == AArch64::SP || SrcReg == AArch64::SP) { diff --git a/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir b/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-copy-zprpair.mir @@ -0,0 +1,78 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -run-pass=postrapseudos -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s + +--- +name: copy_zpr2 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$z0_z1' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $z0_z1 + ; CHECK-LABEL: name: copy_zpr2 + ; CHECK: liveins: $z0_z1 + ; CHECK: $z2 = ORR_ZZZ $z1, $z1 + ; CHECK: $z1 = ORR_ZZZ $z0, $z0 + ; CHECK: $z0 = ORR_ZZZ $z1, $z1 + ; CHECK: $z1 = ORR_ZZZ $z2, $z2 + ; CHECK: RET_ReallyLR + $z1_z2 = COPY $z0_z1 + $z0_z1 = COPY $z1_z2 + RET_ReallyLR + +... +--- +name: copy_zpr3 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$z0_z1_z2' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $z0_z1_z2 + ; CHECK-LABEL: name: copy_zpr3 + ; CHECK: liveins: $z0_z1_z2 + ; CHECK: $z3 = ORR_ZZZ $z2, $z2 + ; CHECK: $z2 = ORR_ZZZ $z1, $z1 + ; CHECK: $z1 = ORR_ZZZ $z0, $z0 + ; CHECK: $z0 = ORR_ZZZ $z1, $z1 + ; CHECK: $z1 = ORR_ZZZ $z2, $z2 + ; CHECK: $z2 = ORR_ZZZ $z3, $z3 + ; CHECK: RET_ReallyLR + $z1_z2_z3 = COPY $z0_z1_z2 + $z0_z1_z2 = COPY $z1_z2_z3 + RET_ReallyLR + +... +--- +name: copy_zpr4 +alignment: 4 +tracksRegLiveness: true +liveins: + - { reg: '$z0_z1_z2_z3' } +frameInfo: + maxCallFrameSize: 0 +body: | + bb.0: + liveins: $z0_z1_z2_z3 + ; CHECK-LABEL: name: copy_zpr4 + ; CHECK: liveins: $z0_z1_z2_z3 + ; CHECK: $z4 = ORR_ZZZ $z3, $z3 + ; CHECK: $z3 = ORR_ZZZ $z2, $z2 + ; CHECK: $z2 = ORR_ZZZ $z1, $z1 + ; CHECK: $z1 = ORR_ZZZ $z0, $z0 + ; CHECK: $z0 = ORR_ZZZ $z1, $z1 + ; CHECK: $z1 = ORR_ZZZ $z2, $z2 + ; CHECK: $z2 = ORR_ZZZ $z3, $z3 + ; CHECK: $z3 = ORR_ZZZ $z4, $z4 + ; CHECK: RET_ReallyLR + $z1_z2_z3_z4 = COPY $z0_z1_z2_z3 + $z0_z1_z2_z3 = COPY $z1_z2_z3_z4 + RET_ReallyLR + +...