diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -110,6 +110,13 @@ return 0; } +static bool forwardCopyWillClobberTuple(unsigned DstReg, unsigned SrcReg, + unsigned NumRegs) { + // We really want the positive remainder mod 32 here, that happens to be + // easily obtainable with a mask. + return ((DstReg - SrcReg) & 0x1f) < NumRegs; +} + void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, @@ -124,6 +131,9 @@ // FPR->FPR copies and VR->VR copies. unsigned Opc; bool IsScalableVector = false; + unsigned NF = 1; + unsigned LMul = 1; + unsigned SubRegIdx = RISCV::sub_vrm1_0; if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) Opc = RISCV::FSGNJ_H; else if (RISCV::FPR32RegClass.contains(DstReg, SrcReg)) @@ -142,10 +152,83 @@ } else if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV8R_V; IsScalableVector = true; - } else + } else if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV1R_V; + SubRegIdx = RISCV::sub_vrm1_0; + NF = 2; + LMul = 1; + } else if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV2R_V; + SubRegIdx = RISCV::sub_vrm2_0; + NF = 2; + LMul = 2; + } else if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV4R_V; + SubRegIdx = RISCV::sub_vrm4_0; + NF = 2; + LMul = 4; + } else if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV1R_V; + SubRegIdx = RISCV::sub_vrm1_0; + NF = 3; + LMul = 1; + } else if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV2R_V; + SubRegIdx = RISCV::sub_vrm2_0; + NF = 3; + LMul = 2; + } else if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV1R_V; + SubRegIdx = RISCV::sub_vrm1_0; + NF = 4; + LMul = 1; + } else if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV2R_V; + SubRegIdx = RISCV::sub_vrm2_0; + NF = 4; + LMul = 2; + } else if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV1R_V; + SubRegIdx = RISCV::sub_vrm1_0; + NF = 5; + LMul = 1; + } else if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV1R_V; + SubRegIdx = RISCV::sub_vrm1_0; + NF = 6; + LMul = 1; + } else if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV1R_V; + SubRegIdx = RISCV::sub_vrm1_0; + NF = 7; + LMul = 1; + } else if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) { + Opc = RISCV::PseudoVMV1R_V; + SubRegIdx = RISCV::sub_vrm1_0; + NF = 8; + LMul = 1; + } else { llvm_unreachable("Impossible reg-to-reg copy"); + } - if (IsScalableVector) + if (NF > 1) { + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + int I = 0, End = NF, Incr = 1; + unsigned SrcEncoding = TRI->getEncodingValue(SrcReg); + unsigned DstEncoding = TRI->getEncodingValue(DstReg); + if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMul)) { + I = NF - 1; + End = -1; + Incr = -1; + } + + for (; I != End; I += Incr) { + BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I)) + .addReg(TRI->getSubReg(SrcReg, SubRegIdx + I), + getKillRegState(KillSrc)); + } + } else if (IsScalableVector) BuildMI(MBB, MBBI, DL, get(Opc), DstReg) .addReg(SrcReg, getKillRegState(KillSrc)); else diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-copy.mir @@ -0,0 +1,282 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple riscv64 -run-pass=postrapseudos %s -o - | FileCheck %s + +... +--- +name: copy_zvlsseg_N2 +body: | + bb.0: + ; CHECK-LABEL: name: copy_zvlsseg_N2 + ; CHECK: $v2 = PseudoVMV1R_V $v4 + ; CHECK: $v3 = PseudoVMV1R_V $v5 + ; CHECK: $v3 = PseudoVMV1R_V $v4 + ; CHECK: $v4 = PseudoVMV1R_V $v5 + ; CHECK: $v6 = PseudoVMV1R_V $v5 + ; CHECK: $v5 = PseudoVMV1R_V $v4 + ; CHECK: $v6 = PseudoVMV1R_V $v4 + ; CHECK: $v7 = PseudoVMV1R_V $v5 + ; CHECK: $v0m2 = PseudoVMV2R_V $v4m2 + ; CHECK: $v2m2 = PseudoVMV2R_V $v6m2 + ; CHECK: $v2m2 = PseudoVMV2R_V $v4m2 + ; CHECK: $v4m2 = PseudoVMV2R_V $v6m2 + ; CHECK: $v8m2 = PseudoVMV2R_V $v6m2 + ; CHECK: $v6m2 = PseudoVMV2R_V $v4m2 + ; CHECK: $v8m2 = PseudoVMV2R_V $v4m2 + ; CHECK: $v10m2 = PseudoVMV2R_V $v6m2 + ; CHECK: $v0m4 = PseudoVMV4R_V $v8m4 + ; CHECK: $v4m4 = PseudoVMV4R_V $v12m4 + ; CHECK: $v4m4 = PseudoVMV4R_V $v8m4 + ; CHECK: $v8m4 = PseudoVMV4R_V $v12m4 + ; CHECK: $v16m4 = PseudoVMV4R_V $v12m4 + ; CHECK: $v12m4 = PseudoVMV4R_V $v8m4 + ; CHECK: $v16m4 = PseudoVMV4R_V $v8m4 + ; CHECK: $v20m4 = PseudoVMV4R_V $v12m4 + $v2_v3 = COPY $v4_v5 + $v3_v4 = COPY $v4_v5 + $v5_v6 = COPY $v4_v5 + $v6_v7 = COPY $v4_v5 + + $v0m2_v2m2 = COPY $v4m2_v6m2 + $v2m2_v4m2 = COPY $v4m2_v6m2 + $v6m2_v8m2 = COPY $v4m2_v6m2 + $v8m2_v10m2 = COPY $v4m2_v6m2 + + $v0m4_v4m4 = COPY $v8m4_v12m4 + $v4m4_v8m4 = COPY $v8m4_v12m4 + $v12m4_v16m4 = COPY $v8m4_v12m4 + $v16m4_v20m4 = COPY $v8m4_v12m4 +... +--- +name: copy_zvlsseg_N3 +body: | + bb.0: + ; CHECK-LABEL: name: copy_zvlsseg_N3 + ; CHECK: $v2 = PseudoVMV1R_V $v5 + ; CHECK: $v3 = PseudoVMV1R_V $v6 + ; CHECK: $v4 = PseudoVMV1R_V $v7 + ; CHECK: $v3 = PseudoVMV1R_V $v5 + ; CHECK: $v4 = PseudoVMV1R_V $v6 + ; CHECK: $v5 = PseudoVMV1R_V $v7 + ; CHECK: $v4 = PseudoVMV1R_V $v5 + ; CHECK: $v5 = PseudoVMV1R_V $v6 + ; CHECK: $v6 = PseudoVMV1R_V $v7 + ; CHECK: $v9 = PseudoVMV1R_V $v7 + ; CHECK: $v8 = PseudoVMV1R_V $v6 + ; CHECK: $v7 = PseudoVMV1R_V $v5 + ; CHECK: $v9 = PseudoVMV1R_V $v5 + ; CHECK: $v10 = PseudoVMV1R_V $v6 + ; CHECK: $v11 = PseudoVMV1R_V $v7 + ; CHECK: $v0m2 = PseudoVMV2R_V $v6m2 + ; CHECK: $v2m2 = PseudoVMV2R_V $v8m2 + ; CHECK: $v4m2 = PseudoVMV2R_V $v10m2 + ; CHECK: $v2m2 = PseudoVMV2R_V $v6m2 + ; CHECK: $v4m2 = PseudoVMV2R_V $v8m2 + ; CHECK: $v6m2 = PseudoVMV2R_V $v10m2 + ; CHECK: $v14m2 = PseudoVMV2R_V $v10m2 + ; CHECK: $v12m2 = PseudoVMV2R_V $v8m2 + ; CHECK: $v10m2 = PseudoVMV2R_V $v6m2 + ; CHECK: $v12m2 = PseudoVMV2R_V $v6m2 + ; CHECK: $v14m2 = PseudoVMV2R_V $v8m2 + ; CHECK: $v16m2 = PseudoVMV2R_V $v10m2 + $v2_v3_v4 = COPY $v5_v6_v7 + $v3_v4_v5 = COPY $v5_v6_v7 + $v4_v5_v6 = COPY $v5_v6_v7 + $v7_v8_v9 = COPY $v5_v6_v7 + $v9_v10_v11 = COPY $v5_v6_v7 + + $v0m2_v2m2_v4m2 = COPY $v6m2_v8m2_v10m2 + $v2m2_v4m2_v6m2 = COPY $v6m2_v8m2_v10m2 + $v10m2_v12m2_v14m2 = COPY $v6m2_v8m2_v10m2 + $v12m2_v14m2_v16m2 = COPY $v6m2_v8m2_v10m2 +... +--- +name: copy_zvlsseg_N4 +body: | + bb.0: + ; CHECK-LABEL: name: copy_zvlsseg_N4 + ; CHECK: $v6 = PseudoVMV1R_V $v10 + ; CHECK: $v7 = PseudoVMV1R_V $v11 + ; CHECK: $v8 = PseudoVMV1R_V $v12 + ; CHECK: $v9 = PseudoVMV1R_V $v13 + ; CHECK: $v7 = PseudoVMV1R_V $v10 + ; CHECK: $v8 = PseudoVMV1R_V $v11 + ; CHECK: $v9 = PseudoVMV1R_V $v12 + ; CHECK: $v10 = PseudoVMV1R_V $v13 + ; CHECK: $v16 = PseudoVMV1R_V $v13 + ; CHECK: $v15 = PseudoVMV1R_V $v12 + ; CHECK: $v14 = PseudoVMV1R_V $v11 + ; CHECK: $v13 = PseudoVMV1R_V $v10 + ; CHECK: $v14 = PseudoVMV1R_V $v10 + ; CHECK: $v15 = PseudoVMV1R_V $v11 + ; CHECK: $v16 = PseudoVMV1R_V $v12 + ; CHECK: $v17 = PseudoVMV1R_V $v13 + ; CHECK: $v2m2 = PseudoVMV2R_V $v10m2 + ; CHECK: $v4m2 = PseudoVMV2R_V $v12m2 + ; CHECK: $v6m2 = PseudoVMV2R_V $v14m2 + ; CHECK: $v8m2 = PseudoVMV2R_V $v16m2 + ; CHECK: $v4m2 = PseudoVMV2R_V $v10m2 + ; CHECK: $v6m2 = PseudoVMV2R_V $v12m2 + ; CHECK: $v8m2 = PseudoVMV2R_V $v14m2 + ; CHECK: $v10m2 = PseudoVMV2R_V $v16m2 + ; CHECK: $v22m2 = PseudoVMV2R_V $v16m2 + ; CHECK: $v20m2 = PseudoVMV2R_V $v14m2 + ; CHECK: $v18m2 = PseudoVMV2R_V $v12m2 + ; CHECK: $v16m2 = PseudoVMV2R_V $v10m2 + ; CHECK: $v18m2 = PseudoVMV2R_V $v10m2 + ; CHECK: $v20m2 = PseudoVMV2R_V $v12m2 + ; CHECK: $v22m2 = PseudoVMV2R_V $v14m2 + ; CHECK: $v24m2 = PseudoVMV2R_V $v16m2 + $v6_v7_v8_v9 = COPY $v10_v11_v12_v13 + $v7_v8_v9_v10 = COPY $v10_v11_v12_v13 + $v13_v14_v15_v16 = COPY $v10_v11_v12_v13 + $v14_v15_v16_v17 = COPY $v10_v11_v12_v13 + + $v2m2_v4m2_v6m2_v8m2 = COPY $v10m2_v12m2_v14m2_v16m2 + $v4m2_v6m2_v8m2_v10m2 = COPY $v10m2_v12m2_v14m2_v16m2 + $v16m2_v18m2_v20m2_v22m2 = COPY $v10m2_v12m2_v14m2_v16m2 + $v18m2_v20m2_v22m2_v24m2 = COPY $v10m2_v12m2_v14m2_v16m2 +... +--- +name: copy_zvlsseg_N5 +body: | + bb.0: + ; CHECK-LABEL: name: copy_zvlsseg_N5 + ; CHECK: $v5 = PseudoVMV1R_V $v10 + ; CHECK: $v6 = PseudoVMV1R_V $v11 + ; CHECK: $v7 = PseudoVMV1R_V $v12 + ; CHECK: $v8 = PseudoVMV1R_V $v13 + ; CHECK: $v9 = PseudoVMV1R_V $v14 + ; CHECK: $v6 = PseudoVMV1R_V $v10 + ; CHECK: $v7 = PseudoVMV1R_V $v11 + ; CHECK: $v8 = PseudoVMV1R_V $v12 + ; CHECK: $v9 = PseudoVMV1R_V $v13 + ; CHECK: $v10 = PseudoVMV1R_V $v14 + ; CHECK: $v18 = PseudoVMV1R_V $v14 + ; CHECK: $v17 = PseudoVMV1R_V $v13 + ; CHECK: $v16 = PseudoVMV1R_V $v12 + ; CHECK: $v15 = PseudoVMV1R_V $v11 + ; CHECK: $v14 = PseudoVMV1R_V $v10 + ; CHECK: $v15 = PseudoVMV1R_V $v10 + ; CHECK: $v16 = PseudoVMV1R_V $v11 + ; CHECK: $v17 = PseudoVMV1R_V $v12 + ; CHECK: $v18 = PseudoVMV1R_V $v13 + ; CHECK: $v19 = PseudoVMV1R_V $v14 + $v5_v6_v7_v8_v9 = COPY $v10_v11_v12_v13_v14 + $v6_v7_v8_v9_v10 = COPY $v10_v11_v12_v13_v14 + $v14_v15_v16_v17_v18 = COPY $v10_v11_v12_v13_v14 + $v15_v16_v17_v18_v19 = COPY $v10_v11_v12_v13_v14 +... +--- +name: copy_zvlsseg_N6 +body: | + bb.0: + ; CHECK-LABEL: name: copy_zvlsseg_N6 + ; CHECK: $v4 = PseudoVMV1R_V $v10 + ; CHECK: $v5 = PseudoVMV1R_V $v11 + ; CHECK: $v6 = PseudoVMV1R_V $v12 + ; CHECK: $v7 = PseudoVMV1R_V $v13 + ; CHECK: $v8 = PseudoVMV1R_V $v14 + ; CHECK: $v9 = PseudoVMV1R_V $v15 + ; CHECK: $v5 = PseudoVMV1R_V $v10 + ; CHECK: $v6 = PseudoVMV1R_V $v11 + ; CHECK: $v7 = PseudoVMV1R_V $v12 + ; CHECK: $v8 = PseudoVMV1R_V $v13 + ; CHECK: $v9 = PseudoVMV1R_V $v14 + ; CHECK: $v10 = PseudoVMV1R_V $v15 + ; CHECK: $v20 = PseudoVMV1R_V $v15 + ; CHECK: $v19 = PseudoVMV1R_V $v14 + ; CHECK: $v18 = PseudoVMV1R_V $v13 + ; CHECK: $v17 = PseudoVMV1R_V $v12 + ; CHECK: $v16 = PseudoVMV1R_V $v11 + ; CHECK: $v15 = PseudoVMV1R_V $v10 + ; CHECK: $v16 = PseudoVMV1R_V $v10 + ; CHECK: $v17 = PseudoVMV1R_V $v11 + ; CHECK: $v18 = PseudoVMV1R_V $v12 + ; CHECK: $v19 = PseudoVMV1R_V $v13 + ; CHECK: $v20 = PseudoVMV1R_V $v14 + ; CHECK: $v21 = PseudoVMV1R_V $v15 + $v4_v5_v6_v7_v8_v9 = COPY $v10_v11_v12_v13_v14_v15 + $v5_v6_v7_v8_v9_v10 = COPY $v10_v11_v12_v13_v14_v15 + $v15_v16_v17_v18_v19_v20 = COPY $v10_v11_v12_v13_v14_v15 + $v16_v17_v18_v19_v20_v21 = COPY $v10_v11_v12_v13_v14_v15 +... +--- +name: copy_zvlsseg_N7 +body: | + bb.0: + ; CHECK-LABEL: name: copy_zvlsseg_N7 + ; CHECK: $v3 = PseudoVMV1R_V $v10 + ; CHECK: $v4 = PseudoVMV1R_V $v11 + ; CHECK: $v5 = PseudoVMV1R_V $v12 + ; CHECK: $v6 = PseudoVMV1R_V $v13 + ; CHECK: $v7 = PseudoVMV1R_V $v14 + ; CHECK: $v8 = PseudoVMV1R_V $v15 + ; CHECK: $v9 = PseudoVMV1R_V $v16 + ; CHECK: $v4 = PseudoVMV1R_V $v10 + ; CHECK: $v5 = PseudoVMV1R_V $v11 + ; CHECK: $v6 = PseudoVMV1R_V $v12 + ; CHECK: $v7 = PseudoVMV1R_V $v13 + ; CHECK: $v8 = PseudoVMV1R_V $v14 + ; CHECK: $v9 = PseudoVMV1R_V $v15 + ; CHECK: $v10 = PseudoVMV1R_V $v16 + ; CHECK: $v22 = PseudoVMV1R_V $v16 + ; CHECK: $v21 = PseudoVMV1R_V $v15 + ; CHECK: $v20 = PseudoVMV1R_V $v14 + ; CHECK: $v19 = PseudoVMV1R_V $v13 + ; CHECK: $v18 = PseudoVMV1R_V $v12 + ; CHECK: $v17 = PseudoVMV1R_V $v11 + ; CHECK: $v16 = PseudoVMV1R_V $v10 + ; CHECK: $v17 = PseudoVMV1R_V $v10 + ; CHECK: $v18 = PseudoVMV1R_V $v11 + ; CHECK: $v19 = PseudoVMV1R_V $v12 + ; CHECK: $v20 = PseudoVMV1R_V $v13 + ; CHECK: $v21 = PseudoVMV1R_V $v14 + ; CHECK: $v22 = PseudoVMV1R_V $v15 + ; CHECK: $v23 = PseudoVMV1R_V $v16 + $v3_v4_v5_v6_v7_v8_v9 = COPY $v10_v11_v12_v13_v14_v15_v16 + $v4_v5_v6_v7_v8_v9_v10 = COPY $v10_v11_v12_v13_v14_v15_v16 + $v16_v17_v18_v19_v20_v21_v22 = COPY $v10_v11_v12_v13_v14_v15_v16 + $v17_v18_v19_v20_v21_v22_v23 = COPY $v10_v11_v12_v13_v14_v15_v16 +... +--- +name: copy_zvlsseg_N8 +body: | + bb.0: + ; CHECK-LABEL: name: copy_zvlsseg_N8 + ; CHECK: $v2 = PseudoVMV1R_V $v10 + ; CHECK: $v3 = PseudoVMV1R_V $v11 + ; CHECK: $v4 = PseudoVMV1R_V $v12 + ; CHECK: $v5 = PseudoVMV1R_V $v13 + ; CHECK: $v6 = PseudoVMV1R_V $v14 + ; CHECK: $v7 = PseudoVMV1R_V $v15 + ; CHECK: $v8 = PseudoVMV1R_V $v16 + ; CHECK: $v9 = PseudoVMV1R_V $v17 + ; CHECK: $v3 = PseudoVMV1R_V $v10 + ; CHECK: $v4 = PseudoVMV1R_V $v11 + ; CHECK: $v5 = PseudoVMV1R_V $v12 + ; CHECK: $v6 = PseudoVMV1R_V $v13 + ; CHECK: $v7 = PseudoVMV1R_V $v14 + ; CHECK: $v8 = PseudoVMV1R_V $v15 + ; CHECK: $v9 = PseudoVMV1R_V $v16 + ; CHECK: $v10 = PseudoVMV1R_V $v17 + ; CHECK: $v24 = PseudoVMV1R_V $v17 + ; CHECK: $v23 = PseudoVMV1R_V $v16 + ; CHECK: $v22 = PseudoVMV1R_V $v15 + ; CHECK: $v21 = PseudoVMV1R_V $v14 + ; CHECK: $v20 = PseudoVMV1R_V $v13 + ; CHECK: $v19 = PseudoVMV1R_V $v12 + ; CHECK: $v18 = PseudoVMV1R_V $v11 + ; CHECK: $v17 = PseudoVMV1R_V $v10 + ; CHECK: $v18 = PseudoVMV1R_V $v10 + ; CHECK: $v19 = PseudoVMV1R_V $v11 + ; CHECK: $v20 = PseudoVMV1R_V $v12 + ; CHECK: $v21 = PseudoVMV1R_V $v13 + ; CHECK: $v22 = PseudoVMV1R_V $v14 + ; CHECK: $v23 = PseudoVMV1R_V $v15 + ; CHECK: $v24 = PseudoVMV1R_V $v16 + ; CHECK: $v25 = PseudoVMV1R_V $v17 + $v2_v3_v4_v5_v6_v7_v8_v9 = COPY $v10_v11_v12_v13_v14_v15_v16_v17 + $v3_v4_v5_v6_v7_v8_v9_v10 = COPY $v10_v11_v12_v13_v14_v15_v16_v17 + $v17_v18_v19_v20_v21_v22_v23_v24 = COPY $v10_v11_v12_v13_v14_v15_v16_v17 + $v18_v19_v20_v21_v22_v23_v24_v25 = COPY $v10_v11_v12_v13_v14_v15_v16_v17 +...