diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -35,6 +35,10 @@ #define GET_INSTRINFO_CTOR_DTOR #include "RISCVGenInstrInfo.inc" +static cl::opt PreferWholeRegisterMove( + "riscv-prefer-whole-register-move", cl::init(false), cl::Hidden, + cl::desc("Prefer whole register move for vector registers.")); + namespace llvm { namespace RISCVVPseudosTable { @@ -117,6 +121,96 @@ return ((DstReg - SrcReg) & 0x1f) < NumRegs; } +static bool isConvertibleToVMV_V_V(const RISCVSubtarget &STI, + const MachineBasicBlock &MBB, + MachineBasicBlock::const_iterator MBBI, + MachineBasicBlock::const_iterator &DefMBBI, + RISCVII::VLMUL &LMul) { + if (PreferWholeRegisterMove) + return false; + + assert(MBBI->getOpcode() == TargetOpcode::COPY && + "Unexpected COPY instruction."); + Register SrcReg = MBBI->getOperand(1).getReg(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + bool FoundDef = false; + bool FirstVSetVLI = false; + while (MBBI != MBB.begin()) { + --MBBI; + if (MBBI->isMetaInstruction()) + continue; + + if (MBBI->getOpcode() == RISCV::PseudoVSETVLI || + MBBI->getOpcode() == RISCV::PseudoVSETIVLI) { + // There is a vsetvli between COPY and source define instruction. + // vy = def_vop ... + // ... + // vsetvli + // ... + // vx = COPY vy + if (!FoundDef) { + if (!FirstVSetVLI) { + FirstVSetVLI = true; + RISCVII::VLMUL FirstLMul = + RISCVVType::getVLMUL(MBBI->getOperand(2).getImm()); + // The first encountered vsetvli must have the same lmul as the + // register class of COPY. + if (FirstLMul != LMul) + return false; + } + // Only permit `vsetvli x0, x0, vtype` between COPY and the source + // define instruction. + if (MBBI->getOperand(0).getReg() != RISCV::X0) + return false; + if (MBBI->getOperand(1).isImm()) + return false; + if (MBBI->getOperand(1).getReg() != RISCV::X0) + return false; + continue; + } + + // The first vsetvli after the def_vop instruction. + unsigned VType = MBBI->getOperand(2).getImm(); + // If the vsetvli is tail undisturbed, keep the whole register move. + if (!RISCVVType::isTailAgnostic(VType)) + return false; + + // The checking is conservative. We only have register classes for + // LMUL = 1/2/4/8. We should be able to convert vmv1r.v to vmv.v.v + // for fractional LMUL operations. However, we could not use the vsetvli + // lmul for widening operations. The result of widening operation is + // 2 x LMUL. + return LMul == RISCVVType::getVLMUL(VType); + } else if (MBBI->isInlineAsm() || MBBI->isCall()) { + assert(!FoundDef && "There must be no inline asm or calls between " + "the source defined instruction and vsetvli."); + return false; + } else if (MBBI->getNumDefs()) { + // Check all the instructions which will change VL. + // For example, vleff has implicit def VL. + if (MBBI->modifiesRegister(RISCV::VL)) + return false; + + for (const MachineOperand &MO : MBBI->defs()) { + if (!FoundDef && TRI->isSubRegisterEq(MO.getReg(), SrcReg)) { + // Found the definition. + FoundDef = true; + DefMBBI = MBBI; + // If the producing instruction does not depend on vsetvli, do not + // convert COPY to vmv.v.v. For example, VL1R_V. + uint64_t TSFlags = DefMBBI->getDesc().TSFlags; + if (!RISCVII::hasSEWOp(TSFlags)) + return false; + break; + } + } + } + } + + return false; +} + void RISCVInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, MCRegister DstReg, @@ -132,7 +226,7 @@ unsigned Opc; bool IsScalableVector = true; unsigned NF = 1; - unsigned LMul = 1; + RISCVII::VLMUL LMul = RISCVII::LMUL_1; unsigned SubRegIdx = RISCV::sub_vrm1_0; if (RISCV::FPR16RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::FSGNJ_H; @@ -145,91 +239,165 @@ IsScalableVector = false; } else if (RISCV::VRRegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV1R_V; + LMul = RISCVII::LMUL_1; } else if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV2R_V; + LMul = RISCVII::LMUL_2; } else if (RISCV::VRM4RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV4R_V; + LMul = RISCVII::LMUL_4; } else if (RISCV::VRM8RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV8R_V; + LMul = RISCVII::LMUL_8; } else if (RISCV::VRN2M1RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV1R_V; SubRegIdx = RISCV::sub_vrm1_0; NF = 2; - LMul = 1; + LMul = RISCVII::LMUL_1; } else if (RISCV::VRN2M2RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV2R_V; SubRegIdx = RISCV::sub_vrm2_0; NF = 2; - LMul = 2; + LMul = RISCVII::LMUL_2; } else if (RISCV::VRN2M4RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV4R_V; SubRegIdx = RISCV::sub_vrm4_0; NF = 2; - LMul = 4; + LMul = RISCVII::LMUL_4; } else if (RISCV::VRN3M1RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV1R_V; SubRegIdx = RISCV::sub_vrm1_0; NF = 3; - LMul = 1; + LMul = RISCVII::LMUL_1; } else if (RISCV::VRN3M2RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV2R_V; SubRegIdx = RISCV::sub_vrm2_0; NF = 3; - LMul = 2; + LMul = RISCVII::LMUL_2; } else if (RISCV::VRN4M1RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV1R_V; SubRegIdx = RISCV::sub_vrm1_0; NF = 4; - LMul = 1; + LMul = RISCVII::LMUL_1; } else if (RISCV::VRN4M2RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV2R_V; SubRegIdx = RISCV::sub_vrm2_0; NF = 4; - LMul = 2; + LMul = RISCVII::LMUL_2; } else if (RISCV::VRN5M1RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV1R_V; SubRegIdx = RISCV::sub_vrm1_0; NF = 5; - LMul = 1; + LMul = RISCVII::LMUL_1; } else if (RISCV::VRN6M1RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV1R_V; SubRegIdx = RISCV::sub_vrm1_0; NF = 6; - LMul = 1; + LMul = RISCVII::LMUL_1; } else if (RISCV::VRN7M1RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV1R_V; SubRegIdx = RISCV::sub_vrm1_0; NF = 7; - LMul = 1; + LMul = RISCVII::LMUL_1; } else if (RISCV::VRN8M1RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::PseudoVMV1R_V; SubRegIdx = RISCV::sub_vrm1_0; NF = 8; - LMul = 1; + LMul = RISCVII::LMUL_1; } else { llvm_unreachable("Impossible reg-to-reg copy"); } if (IsScalableVector) { + bool UseVMV_V_V = false; + MachineBasicBlock::const_iterator DefMBBI; + unsigned DefOpNum; + unsigned VIOpc; + if (isConvertibleToVMV_V_V(STI, MBB, MBBI, DefMBBI, LMul)) { + UseVMV_V_V = true; + DefOpNum = DefMBBI->getNumOperands(); + switch (LMul) { + default: + llvm_unreachable("Impossible LMUL for vector register copy."); + case RISCVII::LMUL_F8: + Opc = RISCV::PseudoVMV_V_V_MF8; + VIOpc = RISCV::PseudoVMV_V_I_MF8; + break; + case RISCVII::LMUL_F4: + Opc = RISCV::PseudoVMV_V_V_MF4; + VIOpc = RISCV::PseudoVMV_V_I_MF4; + break; + case RISCVII::LMUL_F2: + Opc = RISCV::PseudoVMV_V_V_MF2; + VIOpc = RISCV::PseudoVMV_V_I_MF2; + break; + case RISCVII::LMUL_1: + Opc = RISCV::PseudoVMV_V_V_M1; + VIOpc = RISCV::PseudoVMV_V_I_M1; + break; + case RISCVII::LMUL_2: + Opc = RISCV::PseudoVMV_V_V_M2; + VIOpc = RISCV::PseudoVMV_V_I_M2; + break; + case RISCVII::LMUL_4: + Opc = RISCV::PseudoVMV_V_V_M4; + VIOpc = RISCV::PseudoVMV_V_I_M4; + break; + case RISCVII::LMUL_8: + Opc = RISCV::PseudoVMV_V_V_M8; + VIOpc = RISCV::PseudoVMV_V_I_M8; + break; + } + } + + bool UseVMV_V_I = false; + if (UseVMV_V_V && (DefMBBI->getOpcode() == VIOpc)) { + UseVMV_V_I = true; + Opc = VIOpc; + } + if (NF == 1) { - BuildMI(MBB, MBBI, DL, get(Opc), DstReg) - .addReg(SrcReg, getKillRegState(KillSrc)); + auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), DstReg); + if (UseVMV_V_I) + MIB = MIB.add(DefMBBI->getOperand(1)); + else + MIB = MIB.addReg(SrcReg, getKillRegState(KillSrc)); + if (UseVMV_V_V) { + // The last two arguments of vector instructions are + // AVL, SEW. + MIB.add(DefMBBI->getOperand(DefOpNum - 4)); // AVL + MIB.add(DefMBBI->getOperand(DefOpNum - 3)); // SEW + MIB.add(DefMBBI->getOperand(DefOpNum - 2)); // implicit vl + MIB.add(DefMBBI->getOperand(DefOpNum - 1)); // implicit vtype + } } else { const TargetRegisterInfo *TRI = STI.getRegisterInfo(); int I = 0, End = NF, Incr = 1; unsigned SrcEncoding = TRI->getEncodingValue(SrcReg); unsigned DstEncoding = TRI->getEncodingValue(DstReg); - if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMul)) { + unsigned LMulBits = static_cast(LMul); + unsigned LMulVal = 1 << (LMulBits < 4 ? LMulBits : 0); + if (forwardCopyWillClobberTuple(DstEncoding, SrcEncoding, NF * LMulVal)) { I = NF - 1; End = -1; Incr = -1; } for (; I != End; I += Incr) { - BuildMI(MBB, MBBI, DL, get(Opc), TRI->getSubReg(DstReg, SubRegIdx + I)) - .addReg(TRI->getSubReg(SrcReg, SubRegIdx + I), - getKillRegState(KillSrc)); + auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), + TRI->getSubReg(DstReg, SubRegIdx + I)); + if (UseVMV_V_I) + MIB = MIB.add(DefMBBI->getOperand(1)); + else + MIB = MIB.addReg(TRI->getSubReg(SrcReg, SubRegIdx + I), + getKillRegState(KillSrc)); + if (UseVMV_V_V) { + MIB.add(DefMBBI->getOperand(DefOpNum - 4)); // AVL + MIB.add(DefMBBI->getOperand(DefOpNum - 3)); // SEW + MIB.add(DefMBBI->getOperand(DefOpNum - 2)); // implicit vl + MIB.add(DefMBBI->getOperand(DefOpNum - 1)); // implicit vtype + } } } } else { diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll @@ -563,7 +563,7 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v8, (a0) ; RV32-NEXT: mv a0, zero -; RV32-NEXT: vmv8r.v v16, v8 +; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: call vector_arg_indirect_stack@plt ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 @@ -615,7 +615,7 @@ ; RV64-NEXT: addi a0, a0, 24 ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: mv a0, zero -; RV64-NEXT: vmv8r.v v16, v8 +; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: call vector_arg_indirect_stack@plt ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 5 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll @@ -798,7 +798,7 @@ ; LMULMAX8-NEXT: addi a2, zero, 42 ; LMULMAX8-NEXT: addi a3, sp, 128 ; LMULMAX8-NEXT: vse32.v v8, (a3) -; LMULMAX8-NEXT: vmv8r.v v8, v24 +; LMULMAX8-NEXT: vmv.v.v v8, v24 ; LMULMAX8-NEXT: call ext3@plt ; LMULMAX8-NEXT: addi sp, s0, -384 ; LMULMAX8-NEXT: ld s0, 368(sp) # 8-byte Folded Reload @@ -827,8 +827,8 @@ ; LMULMAX4-NEXT: addi a3, zero, 42 ; LMULMAX4-NEXT: addi a1, sp, 128 ; LMULMAX4-NEXT: vse32.v v8, (a1) -; LMULMAX4-NEXT: vmv4r.v v8, v28 -; LMULMAX4-NEXT: vmv4r.v v12, v24 +; LMULMAX4-NEXT: vmv.v.v v8, v28 +; LMULMAX4-NEXT: vmv.v.v v12, v24 ; LMULMAX4-NEXT: call ext3@plt ; LMULMAX4-NEXT: addi sp, s0, -384 ; LMULMAX4-NEXT: ld s0, 368(sp) # 8-byte Folded Reload @@ -865,10 +865,10 @@ ; LMULMAX2-NEXT: addi a5, zero, 42 ; LMULMAX2-NEXT: addi a1, sp, 128 ; LMULMAX2-NEXT: vse32.v v8, (a1) -; LMULMAX2-NEXT: vmv2r.v v8, v26 -; LMULMAX2-NEXT: vmv2r.v v10, v28 -; LMULMAX2-NEXT: vmv2r.v v12, v30 -; LMULMAX2-NEXT: vmv2r.v v14, v24 +; LMULMAX2-NEXT: vmv.v.v v8, v26 +; LMULMAX2-NEXT: vmv.v.v v10, v28 +; LMULMAX2-NEXT: vmv.v.v v12, v30 +; LMULMAX2-NEXT: vmv.v.v v14, v24 ; LMULMAX2-NEXT: call ext3@plt ; LMULMAX2-NEXT: addi sp, s0, -384 ; LMULMAX2-NEXT: ld s0, 368(sp) # 8-byte Folded Reload @@ -924,14 +924,14 @@ ; LMULMAX1-NEXT: addi a0, sp, 128 ; LMULMAX1-NEXT: addi a1, sp, 128 ; LMULMAX1-NEXT: vse32.v v8, (a1) -; LMULMAX1-NEXT: vmv1r.v v8, v25 -; LMULMAX1-NEXT: vmv1r.v v9, v26 -; LMULMAX1-NEXT: vmv1r.v v10, v27 -; LMULMAX1-NEXT: vmv1r.v v11, v28 -; LMULMAX1-NEXT: vmv1r.v v12, v29 -; LMULMAX1-NEXT: vmv1r.v v13, v30 -; LMULMAX1-NEXT: vmv1r.v v14, v31 -; LMULMAX1-NEXT: vmv1r.v v15, v24 +; LMULMAX1-NEXT: vmv.v.v v8, v25 +; LMULMAX1-NEXT: vmv.v.v v9, v26 +; LMULMAX1-NEXT: vmv.v.v v10, v27 +; LMULMAX1-NEXT: vmv.v.v v11, v28 +; LMULMAX1-NEXT: vmv.v.v v12, v29 +; LMULMAX1-NEXT: vmv.v.v v13, v30 +; LMULMAX1-NEXT: vmv.v.v v14, v31 +; LMULMAX1-NEXT: vmv.v.v v15, v24 ; LMULMAX1-NEXT: call ext3@plt ; LMULMAX1-NEXT: addi sp, s0, -384 ; LMULMAX1-NEXT: ld s0, 368(sp) # 8-byte Folded Reload @@ -1109,7 +1109,7 @@ ; LMULMAX2-NEXT: vmv1r.v v10, v8 ; LMULMAX2-NEXT: vmv1r.v v11, v8 ; LMULMAX2-NEXT: vmv1r.v v12, v8 -; LMULMAX2-NEXT: vmv2r.v v22, v14 +; LMULMAX2-NEXT: vmv.v.v v22, v14 ; LMULMAX2-NEXT: call split_vector_args@plt ; LMULMAX2-NEXT: addi sp, s0, -256 ; LMULMAX2-NEXT: ld s0, 240(sp) # 8-byte Folded Reload @@ -1161,9 +1161,9 @@ ; LMULMAX1-NEXT: vmv1r.v v10, v8 ; LMULMAX1-NEXT: vmv1r.v v11, v8 ; LMULMAX1-NEXT: vmv1r.v v12, v8 -; LMULMAX1-NEXT: vmv1r.v v21, v13 -; LMULMAX1-NEXT: vmv1r.v v22, v14 -; LMULMAX1-NEXT: vmv1r.v v23, v15 +; LMULMAX1-NEXT: vmv.v.v v21, v13 +; LMULMAX1-NEXT: vmv.v.v v22, v14 +; LMULMAX1-NEXT: vmv.v.v v23, v15 ; LMULMAX1-NEXT: call split_vector_args@plt ; LMULMAX1-NEXT: addi sp, s0, -256 ; LMULMAX1-NEXT: ld s0, 240(sp) # 8-byte Folded Reload @@ -1282,7 +1282,7 @@ ; LMULMAX8-NEXT: addi a7, zero, 7 ; LMULMAX8-NEXT: sd a0, 128(sp) ; LMULMAX8-NEXT: mv a0, zero -; LMULMAX8-NEXT: vmv8r.v v16, v8 +; LMULMAX8-NEXT: vmv.v.i v16, 0 ; LMULMAX8-NEXT: call vector_arg_via_stack@plt ; LMULMAX8-NEXT: ld ra, 136(sp) # 8-byte Folded Reload ; LMULMAX8-NEXT: addi sp, sp, 144 @@ -1309,9 +1309,9 @@ ; LMULMAX4-NEXT: addi a7, zero, 7 ; LMULMAX4-NEXT: vse32.v v8, (a0) ; LMULMAX4-NEXT: mv a0, zero -; LMULMAX4-NEXT: vmv4r.v v12, v8 -; LMULMAX4-NEXT: vmv4r.v v16, v8 -; LMULMAX4-NEXT: vmv4r.v v20, v8 +; LMULMAX4-NEXT: vmv.v.i v12, 0 +; LMULMAX4-NEXT: vmv.v.i v16, 0 +; LMULMAX4-NEXT: vmv.v.i v20, 0 ; LMULMAX4-NEXT: call vector_arg_via_stack@plt ; LMULMAX4-NEXT: ld ra, 136(sp) # 8-byte Folded Reload ; LMULMAX4-NEXT: addi sp, sp, 144 @@ -1342,13 +1342,13 @@ ; LMULMAX2-NEXT: addi a7, zero, 7 ; LMULMAX2-NEXT: vse32.v v8, (a0) ; LMULMAX2-NEXT: mv a0, zero -; LMULMAX2-NEXT: vmv2r.v v10, v8 -; LMULMAX2-NEXT: vmv2r.v v12, v8 -; LMULMAX2-NEXT: vmv2r.v v14, v8 -; LMULMAX2-NEXT: vmv2r.v v16, v8 -; LMULMAX2-NEXT: vmv2r.v v18, v8 -; LMULMAX2-NEXT: vmv2r.v v20, v8 -; LMULMAX2-NEXT: vmv2r.v v22, v8 +; LMULMAX2-NEXT: vmv.v.i v10, 0 +; LMULMAX2-NEXT: vmv.v.i v12, 0 +; LMULMAX2-NEXT: vmv.v.i v14, 0 +; LMULMAX2-NEXT: vmv.v.i v16, 0 +; LMULMAX2-NEXT: vmv.v.i v18, 0 +; LMULMAX2-NEXT: vmv.v.i v20, 0 +; LMULMAX2-NEXT: vmv.v.i v22, 0 ; LMULMAX2-NEXT: call vector_arg_via_stack@plt ; LMULMAX2-NEXT: ld ra, 136(sp) # 8-byte Folded Reload ; LMULMAX2-NEXT: addi sp, sp, 144 @@ -1387,21 +1387,21 @@ ; LMULMAX1-NEXT: addi a7, zero, 7 ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: mv a0, zero -; LMULMAX1-NEXT: vmv1r.v v9, v8 -; LMULMAX1-NEXT: vmv1r.v v10, v8 -; LMULMAX1-NEXT: vmv1r.v v11, v8 -; LMULMAX1-NEXT: vmv1r.v v12, v8 -; LMULMAX1-NEXT: vmv1r.v v13, v8 -; LMULMAX1-NEXT: vmv1r.v v14, v8 -; LMULMAX1-NEXT: vmv1r.v v15, v8 -; LMULMAX1-NEXT: vmv1r.v v16, v8 -; LMULMAX1-NEXT: vmv1r.v v17, v8 -; LMULMAX1-NEXT: vmv1r.v v18, v8 -; LMULMAX1-NEXT: vmv1r.v v19, v8 -; LMULMAX1-NEXT: vmv1r.v v20, v8 -; LMULMAX1-NEXT: vmv1r.v v21, v8 -; LMULMAX1-NEXT: vmv1r.v v22, v8 -; LMULMAX1-NEXT: vmv1r.v v23, v8 +; LMULMAX1-NEXT: vmv.v.i v9, 0 +; LMULMAX1-NEXT: vmv.v.i v10, 0 +; LMULMAX1-NEXT: vmv.v.i v11, 0 +; LMULMAX1-NEXT: vmv.v.i v12, 0 +; LMULMAX1-NEXT: vmv.v.i v13, 0 +; LMULMAX1-NEXT: vmv.v.i v14, 0 +; LMULMAX1-NEXT: vmv.v.i v15, 0 +; LMULMAX1-NEXT: vmv.v.i v16, 0 +; LMULMAX1-NEXT: vmv.v.i v17, 0 +; LMULMAX1-NEXT: vmv.v.i v18, 0 +; LMULMAX1-NEXT: vmv.v.i v19, 0 +; LMULMAX1-NEXT: vmv.v.i v20, 0 +; LMULMAX1-NEXT: vmv.v.i v21, 0 +; LMULMAX1-NEXT: vmv.v.i v22, 0 +; LMULMAX1-NEXT: vmv.v.i v23, 0 ; LMULMAX1-NEXT: call vector_arg_via_stack@plt ; LMULMAX1-NEXT: ld ra, 136(sp) # 8-byte Folded Reload ; LMULMAX1-NEXT: addi sp, sp, 144 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir b/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir @@ -0,0 +1,146 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -verify-machineinstrs -mtriple riscv64 -run-pass=postrapseudos %s -o - | FileCheck %s + +... +--- +name: copy_different_lmul +tracksRegLiveness: true +body: | + bb.0: + liveins: $x14, $x16 + ; CHECK-LABEL: name: copy_different_lmul + ; CHECK: liveins: $x14, $x16 + ; CHECK: $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + ; CHECK: $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + ; CHECK: $v12m2 = PseudoVMV2R_V $v28m2 + $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + $v12m2 = COPY $v28m2 +... +--- +name: copy_convert_to_vmv_v_v +tracksRegLiveness: true +body: | + bb.0: + liveins: $x14, $x16 + ; CHECK-LABEL: name: copy_convert_to_vmv_v_v + ; CHECK: liveins: $x14, $x16 + ; CHECK: $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + ; CHECK: $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + ; CHECK: $v12m4 = PseudoVMV_V_V_M4 $v28m4, $noreg, 5, implicit $vl, implicit $vtype + $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + $v12m4 = COPY $v28m4 +... +--- +name: copy_convert_to_vmv_v_i +tracksRegLiveness: true +body: | + bb.0: + liveins: $x14 + ; CHECK-LABEL: name: copy_convert_to_vmv_v_i + ; CHECK: liveins: $x14 + ; CHECK: $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + ; CHECK: $v28m4 = PseudoVMV_V_I_M4 0, $noreg, 5, implicit $vl, implicit $vtype + ; CHECK: $v12m4 = PseudoVMV_V_I_M4 0, $noreg, 5, implicit $vl, implicit $vtype + $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + $v28m4 = PseudoVMV_V_I_M4 0, $noreg, 5, implicit $vl, implicit $vtype + $v12m4 = COPY $v28m4 +... +--- +name: copy_from_whole_load_store +tracksRegLiveness: true +body: | + bb.0: + liveins: $x14, $x16 + ; CHECK-LABEL: name: copy_from_whole_load_store + ; CHECK: liveins: $x14, $x16 + ; CHECK: $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + ; CHECK: $v28m4 = VL4RE32_V $x16 + ; CHECK: $v12m4 = PseudoVMV4R_V $v28m4 + $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + $v28m4 = VL4RE32_V $x16 + $v12m4 = COPY $v28m4 +... +--- +name: copy_with_vleff +tracksRegLiveness: true +body: | + bb.0: + liveins: $x14, $x16 + ; CHECK-LABEL: name: copy_with_vleff + ; CHECK: liveins: $x14, $x16 + ; CHECK: $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + ; CHECK: $v28m4 = PseudoVMV_V_I_M4 0, $noreg, 5, implicit $vl, implicit $vtype + ; CHECK: $v4m4 = PseudoVLE32FF_V_M4 $x16, $noreg, 5, implicit-def $vl + ; CHECK: $v12m4 = PseudoVMV4R_V $v28m4 + $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + $v28m4 = PseudoVMV_V_I_M4 0, $noreg, 5, implicit $vl, implicit $vtype + $v4m4 = PseudoVLE32FF_V_M4 $x16, $noreg, 5, implicit-def $vl + $v12m4 = COPY $v28m4 +... +--- +name: copy_with_vsetvl_x0_x0_1 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x14, $x16, $x17, $x18 + ; CHECK-LABEL: name: copy_with_vsetvl_x0_x0_1 + ; CHECK: liveins: $x14, $x16, $x17, $x18 + ; CHECK: $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + ; CHECK: $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + ; CHECK: $x15 = PseudoVSETVLI $x17, 73, implicit-def $vl, implicit-def $vtype + ; CHECK: $v0m2 = PseudoVLE32_V_M2 $x18, $noreg, 4, implicit $vl, implicit $vtype + ; CHECK: $x0 = PseudoVSETVLI $x0, 82, implicit-def $vl, implicit-def $vtype + ; CHECK: $v4m4 = PseudoVLE32_V_M4 killed $x18, $noreg, 5, implicit $vl, implicit $vtype + ; CHECK: $v12m4 = PseudoVMV4R_V $v28m4 + $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + $x15 = PseudoVSETVLI $x17, 73, implicit-def $vl, implicit-def $vtype + $v0m2 = PseudoVLE32_V_M2 $x18, $noreg, 4, implicit $vl, implicit $vtype + $x0 = PseudoVSETVLI $x0, 82, implicit-def $vl, implicit-def $vtype + $v4m4 = PseudoVLE32_V_M4 killed $x18, $noreg, 5, implicit $vl, implicit $vtype + $v12m4 = COPY $v28m4 +... +--- +name: copy_with_vsetvl_x0_x0_2 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x14, $x16, $x17, $x18 + ; CHECK-LABEL: name: copy_with_vsetvl_x0_x0_2 + ; CHECK: liveins: $x14, $x16, $x17, $x18 + ; CHECK: $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + ; CHECK: $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + ; CHECK: $x0 = PseudoVSETVLI $x0, 73, implicit-def $vl, implicit-def $vtype + ; CHECK: $v0m2 = PseudoVLE32_V_M2 $x18, $noreg, 4, implicit $vl, implicit $vtype + ; CHECK: $x0 = PseudoVSETVLI $x0, 82, implicit-def $vl, implicit-def $vtype + ; CHECK: $v4m4 = PseudoVLE32_V_M4 killed $x18, $noreg, 5, implicit $vl, implicit $vtype + ; CHECK: $v12m4 = PseudoVMV_V_V_M4 $v28m4, $noreg, 5, implicit $vl, implicit $vtype + $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + $x0 = PseudoVSETVLI $x0, 73, implicit-def $vl, implicit-def $vtype + $v0m2 = PseudoVLE32_V_M2 $x18, $noreg, 4, implicit $vl, implicit $vtype + $x0 = PseudoVSETVLI $x0, 82, implicit-def $vl, implicit-def $vtype + $v4m4 = PseudoVLE32_V_M4 killed $x18, $noreg, 5, implicit $vl, implicit $vtype + $v12m4 = COPY $v28m4 +... +--- +name: copy_with_vsetvl_x0_x0_3 +tracksRegLiveness: true +body: | + bb.0: + liveins: $x14, $x16, $x17, $x18 + ; CHECK-LABEL: name: copy_with_vsetvl_x0_x0_3 + ; CHECK: liveins: $x14, $x16, $x17, $x18 + ; CHECK: $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + ; CHECK: $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + ; CHECK: $x0 = PseudoVSETVLI $x0, 73, implicit-def $vl, implicit-def $vtype + ; CHECK: $v0m2 = PseudoVLE32_V_M2 $x18, $noreg, 4, implicit $vl, implicit $vtype + ; CHECK: $v12m4 = PseudoVMV4R_V $v28m4 + $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype + $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + $x0 = PseudoVSETVLI $x0, 73, implicit-def $vl, implicit-def $vtype + $v0m2 = PseudoVLE32_V_M2 $x18, $noreg, 4, implicit $vl, implicit $vtype + $v12m4 = COPY $v28m4 +...