diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -189,7 +189,6 @@ struct VLSEGPseudo { uint16_t NF : 4; uint16_t Masked : 1; - uint16_t IsTU : 1; uint16_t Strided : 1; uint16_t FF : 1; uint16_t Log2SEW : 3; @@ -200,7 +199,6 @@ struct VLXSEGPseudo { uint16_t NF : 4; uint16_t Masked : 1; - uint16_t IsTU : 1; uint16_t Ordered : 1; uint16_t Log2SEW : 3; uint16_t LMUL : 3; @@ -229,7 +227,6 @@ struct VLEPseudo { uint16_t Masked : 1; - uint16_t IsTU : 1; uint16_t Strided : 1; uint16_t FF : 1; uint16_t Log2SEW : 3; @@ -247,7 +244,6 @@ struct VLX_VSXPseudo { uint16_t Masked : 1; - uint16_t IsTU : 1; uint16_t Ordered : 1; uint16_t Log2SEW : 3; uint16_t LMUL : 3; diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -312,12 +312,19 @@ SDValue SEWOp = CurDAG->getTargetConstant(Log2SEW, DL, XLenVT); Operands.push_back(SEWOp); - // Masked load has the tail policy argument. - if (IsMasked && IsLoad) { - // Policy must be a constant. - uint64_t Policy = Node->getConstantOperandVal(CurOp++); - SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); - Operands.push_back(PolicyOp); + // At the IR layer, all the masked load intrinsics have policy operands, + // none of the others do. All have passthru operands. For our pseudos, + // all loads have policy operands. + if (IsLoad) { + if (IsMasked) { + uint64_t Policy = Node->getConstantOperandVal(CurOp++); + SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); + Operands.push_back(PolicyOp); + } else { + uint64_t Policy = RISCVII::MASK_AGNOSTIC; + SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); + Operands.push_back(PolicyOp); + } } Operands.push_back(Chain); // Chain. @@ -325,10 +332,6 @@ Operands.push_back(Glue); } -static bool isAllUndef(ArrayRef Values) { - return llvm::all_of(Values, [](SDValue V) { return V->isUndef(); }); -} - void RISCVDAGToDAGISel::selectVLSEG(SDNode *Node, bool IsMasked, bool IsStrided) { SDLoc DL(Node); @@ -342,18 +345,15 @@ SmallVector Regs(Node->op_begin() + CurOp, Node->op_begin() + CurOp + NF); - bool IsTU = IsMasked || !isAllUndef(Regs); - if (IsTU) { - SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL); - Operands.push_back(Merge); - } + SDValue Merge = createTuple(*CurDAG, Regs, NF, LMUL); + Operands.push_back(Merge); CurOp += NF; addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, Operands, /*IsLoad=*/true); const RISCV::VLSEGPseudo *P = - RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, + RISCV::getVLSEGPseudo(NF, IsMasked, IsStrided, /*FF*/ false, Log2SEW, static_cast(LMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); @@ -385,11 +385,8 @@ SmallVector Regs(Node->op_begin() + CurOp, Node->op_begin() + CurOp + NF); - bool IsTU = IsMasked || !isAllUndef(Regs); - if (IsTU) { - SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); - Operands.push_back(MaskedOff); - } + SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); + Operands.push_back(MaskedOff); CurOp += NF; addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, @@ -397,7 +394,7 @@ /*IsLoad=*/true); const RISCV::VLSEGPseudo *P = - RISCV::getVLSEGPseudo(NF, IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, + RISCV::getVLSEGPseudo(NF, IsMasked, /*Strided*/ false, /*FF*/ true, Log2SEW, static_cast(LMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, XLenVT, MVT::Other, Operands); @@ -430,11 +427,8 @@ SmallVector Regs(Node->op_begin() + CurOp, Node->op_begin() + CurOp + NF); - bool IsTU = IsMasked || !isAllUndef(Regs); - if (IsTU) { - SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); - Operands.push_back(MaskedOff); - } + SDValue MaskedOff = createTuple(*CurDAG, Regs, NF, LMUL); + Operands.push_back(MaskedOff); CurOp += NF; MVT IndexVT; @@ -452,7 +446,7 @@ "values when XLEN=32"); } const RISCV::VLXSEGPseudo *P = RISCV::getVLXSEGPseudo( - NF, IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast(LMUL), + NF, IsMasked, IsOrdered, IndexLog2EEW, static_cast(LMUL), static_cast(IndexLMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, MVT::Untyped, MVT::Other, Operands); @@ -1691,14 +1685,8 @@ unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); unsigned CurOp = 2; - // Masked intrinsic only have TU version pseduo instructions. - bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef(); SmallVector Operands; - if (IsTU) - Operands.push_back(Node->getOperand(CurOp++)); - else - // Skip the undef passthru operand for nomask TA version pseudo - CurOp++; + Operands.push_back(Node->getOperand(CurOp++)); MVT IndexVT; addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, @@ -1716,7 +1704,7 @@ "values when XLEN=32"); } const RISCV::VLX_VSXPseudo *P = RISCV::getVLXPseudo( - IsMasked, IsTU, IsOrdered, IndexLog2EEW, static_cast(LMUL), + IsMasked, IsOrdered, IndexLog2EEW, static_cast(LMUL), static_cast(IndexLMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); @@ -1740,25 +1728,30 @@ MVT VT = Node->getSimpleValueType(0); unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); - unsigned CurOp = 2; - // The riscv_vlm intrinsic are always tail agnostic and no passthru operand. + // The riscv_vlm intrinsic are always tail agnostic and no passthru + // operand at the IR level. In pseudos, they have both policy and + // passthru operand. The passthru operand is needed to track the + // "tail undefined" state, and the policy is there just for + // for consistency - it will always be "don't care" for the + // unmasked form. bool HasPassthruOperand = IntNo != Intrinsic::riscv_vlm; - // Masked intrinsic only have TU version pseduo instructions. - bool IsTU = HasPassthruOperand && - (IsMasked || !Node->getOperand(CurOp).isUndef()); + unsigned CurOp = 2; SmallVector Operands; - if (IsTU) + if (HasPassthruOperand) Operands.push_back(Node->getOperand(CurOp++)); - else if (HasPassthruOperand) - // Skip the undef passthru operand for nomask TA version pseudo - CurOp++; - + else { + // We eagerly lower to implicit_def (instead of undef), as we + // otherwise fail to select nodes such as: nxv1i1 = undef + SDNode *Passthru = + CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); + Operands.push_back(SDValue(Passthru, 0)); + } addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, IsStrided, Operands, /*IsLoad=*/true); RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); const RISCV::VLEPseudo *P = - RISCV::getVLEPseudo(IsMasked, IsTU, IsStrided, /*FF*/ false, Log2SEW, + RISCV::getVLEPseudo(IsMasked, IsStrided, /*FF*/ false, Log2SEW, static_cast(LMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); @@ -1777,22 +1770,15 @@ unsigned Log2SEW = Log2_32(VT.getScalarSizeInBits()); unsigned CurOp = 2; - // Masked intrinsic only have TU version pseduo instructions. - bool IsTU = IsMasked || !Node->getOperand(CurOp).isUndef(); SmallVector Operands; - if (IsTU) - Operands.push_back(Node->getOperand(CurOp++)); - else - // Skip the undef passthru operand for nomask TA version pseudo - CurOp++; - + Operands.push_back(Node->getOperand(CurOp++)); addVectorLoadStoreOperands(Node, Log2SEW, DL, CurOp, IsMasked, /*IsStridedOrIndexed*/ false, Operands, /*IsLoad=*/true); RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); const RISCV::VLEPseudo *P = - RISCV::getVLEPseudo(IsMasked, IsTU, /*Strided*/ false, /*FF*/ true, + RISCV::getVLEPseudo(IsMasked, /*Strided*/ false, /*FF*/ true, Log2SEW, static_cast(LMUL)); MachineSDNode *Load = CurDAG->getMachineNode( P->Pseudo, DL, Node->getVTList(), Operands); @@ -1916,7 +1902,7 @@ "values when XLEN=32"); } const RISCV::VLX_VSXPseudo *P = RISCV::getVSXPseudo( - IsMasked, /*TU*/ false, IsOrdered, IndexLog2EEW, + IsMasked, IsOrdered, IndexLog2EEW, static_cast(LMUL), static_cast(IndexLMUL)); MachineSDNode *Store = CurDAG->getMachineNode(P->Pseudo, DL, Node->getVTList(), Operands); @@ -2113,14 +2099,17 @@ if (IsStrided && !Subtarget->hasOptimizedZeroStrideLoad()) break; - SmallVector Operands = {Ld->getBasePtr()}; + SmallVector Operands = + {CurDAG->getUNDEF(VT), Ld->getBasePtr()}; if (IsStrided) Operands.push_back(CurDAG->getRegister(RISCV::X0, XLenVT)); - Operands.append({VL, SEW, Ld->getChain()}); + uint64_t Policy = RISCVII::MASK_AGNOSTIC | RISCVII::TAIL_AGNOSTIC; + SDValue PolicyOp = CurDAG->getTargetConstant(Policy, DL, XLenVT); + Operands.append({VL, SEW, PolicyOp, Ld->getChain()}); RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT); const RISCV::VLEPseudo *P = RISCV::getVLEPseudo( - /*IsMasked*/ false, /*IsTU*/ false, IsStrided, /*FF*/ false, + /*IsMasked*/ false, IsStrided, /*FF*/ false, Log2SEW, static_cast(LMUL)); MachineSDNode *Load = CurDAG->getMachineNode(P->Pseudo, DL, {VT, MVT::Other}, Operands); diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -160,11 +160,23 @@ // lanes are undefined. return true; - // If the tied operand is an IMPLICIT_DEF, the pass through lanes - // are undefined. + // If the tied operand is an IMPLICIT_DEF (or a REG_SEQUENCE whose operands + // are solely IMPLICIT_DEFS), the pass through lanes are undefined. const MachineOperand &UseMO = MI.getOperand(UseOpIdx); - MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg()); - return UseMI && UseMI->isImplicitDef(); + if (MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg())) { + if (UseMI->isImplicitDef()) + return true; + + if (UseMI->isRegSequence()) { + for (unsigned i = 1, e = UseMI->getNumOperands(); i < e; i += 2) { + MachineInstr *SourceMI = MRI.getVRegDef(UseMI->getOperand(i).getReg()); + if (!SourceMI || !SourceMI->isImplicitDef()) + return false; + } + return true; + } + } + return false; } /// Which subfields of VL or VTYPE have values we need to preserve? diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -556,9 +556,8 @@ let PrimaryKeyName = "getMaskedPseudoInfo"; } -class RISCVVLE S, bits<3> L> { +class RISCVVLE S, bits<3> L> { bits<1> Masked = M; - bits<1> IsTU = TU; bits<1> Strided = Str; bits<1> FF = F; bits<3> Log2SEW = S; @@ -579,8 +578,8 @@ def RISCVVLETable : GenericTable { let FilterClass = "RISCVVLE"; let CppTypeName = "VLEPseudo"; - let Fields = ["Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"]; - let PrimaryKey = ["Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL"]; + let Fields = ["Masked", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"]; + let PrimaryKey = ["Masked", "Strided", "FF", "Log2SEW", "LMUL"]; let PrimaryKeyName = "getVLEPseudo"; } @@ -600,9 +599,8 @@ let PrimaryKeyName = "getVSEPseudo"; } -class RISCVVLX_VSX S, bits<3> L, bits<3> IL> { +class RISCVVLX_VSX S, bits<3> L, bits<3> IL> { bits<1> Masked = M; - bits<1> IsTU = TU; bits<1> Ordered = O; bits<3> Log2SEW = S; bits<3> LMUL = L; @@ -610,15 +608,15 @@ Pseudo Pseudo = !cast(NAME); } -class RISCVVLX S, bits<3> L, bits<3> IL> : - RISCVVLX_VSX; +class RISCVVLX S, bits<3> L, bits<3> IL> : + RISCVVLX_VSX; class RISCVVSX S, bits<3> L, bits<3> IL> : - RISCVVLX_VSX; + RISCVVLX_VSX; class RISCVVLX_VSXTable : GenericTable { let CppTypeName = "VLX_VSXPseudo"; - let Fields = ["Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"]; - let PrimaryKey = ["Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"]; + let Fields = ["Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"]; + let PrimaryKey = ["Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"]; } def RISCVVLXTable : RISCVVLX_VSXTable { @@ -631,10 +629,9 @@ let PrimaryKeyName = "getVSXPseudo"; } -class RISCVVLSEG N, bit M, bit TU, bit Str, bit F, bits<3> S, bits<3> L> { +class RISCVVLSEG N, bit M, bit Str, bit F, bits<3> S, bits<3> L> { bits<4> NF = N; bits<1> Masked = M; - bits<1> IsTU = TU; bits<1> Strided = Str; bits<1> FF = F; bits<3> Log2SEW = S; @@ -645,15 +642,14 @@ def RISCVVLSEGTable : GenericTable { let FilterClass = "RISCVVLSEG"; let CppTypeName = "VLSEGPseudo"; - let Fields = ["NF", "Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"]; - let PrimaryKey = ["NF", "Masked", "IsTU", "Strided", "FF", "Log2SEW", "LMUL"]; + let Fields = ["NF", "Masked", "Strided", "FF", "Log2SEW", "LMUL", "Pseudo"]; + let PrimaryKey = ["NF", "Masked", "Strided", "FF", "Log2SEW", "LMUL"]; let PrimaryKeyName = "getVLSEGPseudo"; } -class RISCVVLXSEG N, bit M, bit TU, bit O, bits<3> S, bits<3> L, bits<3> IL> { +class RISCVVLXSEG N, bit M, bit O, bits<3> S, bits<3> L, bits<3> IL> { bits<4> NF = N; bits<1> Masked = M; - bits<1> IsTU = TU; bits<1> Ordered = O; bits<3> Log2SEW = S; bits<3> LMUL = L; @@ -664,8 +660,8 @@ def RISCVVLXSEGTable : GenericTable { let FilterClass = "RISCVVLXSEG"; let CppTypeName = "VLXSEGPseudo"; - let Fields = ["NF", "Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"]; - let PrimaryKey = ["NF", "Masked", "IsTU", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"]; + let Fields = ["NF", "Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL", "Pseudo"]; + let PrimaryKey = ["NF", "Masked", "Ordered", "Log2SEW", "LMUL", "IndexLMUL"]; let PrimaryKeyName = "getVLXSEGPseudo"; } @@ -747,26 +743,16 @@ class VPseudoUSLoadNoMask : Pseudo<(outs RetClass:$rd), - (ins GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>, - RISCVVPseudo, - RISCVVLE { - let mayLoad = 1; - let mayStore = 0; - let hasSideEffects = 0; - let HasVLOp = 1; - let HasSEWOp = 1; -} - -class VPseudoUSLoadNoMaskTU : - Pseudo<(outs RetClass:$rd), - (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>, + (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew, + ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLE { + RISCVVLE { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; + let HasVecPolicyOp = 1; let Constraints = "$rd = $dest"; } @@ -776,7 +762,7 @@ GPRMem:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLE { + RISCVVLE { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -789,26 +775,16 @@ class VPseudoUSLoadFFNoMask : Pseudo<(outs RetClass:$rd, GPR:$vl), - (ins GPRMem:$rs1, AVL:$avl, ixlenimm:$sew),[]>, + (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl, + ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLE { - let mayLoad = 1; - let mayStore = 0; - let hasSideEffects = 0; - let HasVLOp = 1; - let HasSEWOp = 1; -} - -class VPseudoUSLoadFFNoMaskTU : - Pseudo<(outs RetClass:$rd, GPR:$vl), - (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl, ixlenimm:$sew),[]>, - RISCVVPseudo, - RISCVVLE { + RISCVVLE { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; + let HasVecPolicyOp = 1; let Constraints = "$rd = $dest"; } @@ -818,7 +794,7 @@ GPRMem:$rs1, VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLE { + RISCVVLE { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -831,26 +807,16 @@ class VPseudoSLoadNoMask: Pseudo<(outs RetClass:$rd), - (ins GPRMem:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>, + (ins RetClass:$dest, GPRMem:$rs1, GPR:$rs2, AVL:$vl, + ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLE { - let mayLoad = 1; - let mayStore = 0; - let hasSideEffects = 0; - let HasVLOp = 1; - let HasSEWOp = 1; -} - -class VPseudoSLoadNoMaskTU: - Pseudo<(outs RetClass:$rd), - (ins RetClass:$dest, GPRMem:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew),[]>, - RISCVVPseudo, - RISCVVLE { + RISCVVLE { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; + let HasVecPolicyOp = 1; let Constraints = "$rd = $dest"; } @@ -860,7 +826,7 @@ GPRMem:$rs1, GPR:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLE { + RISCVVLE { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -873,31 +839,17 @@ class VPseudoILoadNoMask LMUL, bit Ordered, bit EarlyClobber>: - Pseudo<(outs RetClass:$rd), - (ins GPRMem:$rs1, IdxClass:$rs2, AVL:$vl, - ixlenimm:$sew),[]>, - RISCVVPseudo, - RISCVVLX { - let mayLoad = 1; - let mayStore = 0; - let hasSideEffects = 0; - let HasVLOp = 1; - let HasSEWOp = 1; - let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd", ""); -} - -class VPseudoILoadNoMaskTU LMUL, - bit Ordered, bit EarlyClobber>: Pseudo<(outs RetClass:$rd), (ins RetClass:$dest, GPRMem:$rs1, IdxClass:$rs2, AVL:$vl, - ixlenimm:$sew),[]>, + ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLX { + RISCVVLX { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; + let HasVecPolicyOp = 1; let Constraints = !if(!eq(EarlyClobber, 1), "@earlyclobber $rd, $rd = $dest", "$rd = $dest"); } @@ -908,7 +860,7 @@ GPRMem:$rs1, IdxClass:$rs2, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLX { + RISCVVLX { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -1428,26 +1380,16 @@ class VPseudoUSSegLoadNoMask NF>: Pseudo<(outs RetClass:$rd), - (ins GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>, - RISCVVPseudo, - RISCVVLSEG { - let mayLoad = 1; - let mayStore = 0; - let hasSideEffects = 0; - let HasVLOp = 1; - let HasSEWOp = 1; -} - -class VPseudoUSSegLoadNoMaskTU NF>: - Pseudo<(outs RetClass:$rd), - (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, ixlenimm:$sew),[]>, + (ins RetClass:$dest, GPRMem:$rs1, AVL:$vl, + ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLSEG { + RISCVVLSEG { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; + let HasVecPolicyOp = 1; let Constraints = "$rd = $dest"; } @@ -1456,7 +1398,7 @@ (ins GetVRegNoV0.R:$merge, GPRMem:$rs1, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLSEG { + RISCVVLSEG { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -1469,26 +1411,16 @@ class VPseudoUSSegLoadFFNoMask NF>: Pseudo<(outs RetClass:$rd, GPR:$vl), - (ins GPRMem:$rs1, AVL:$avl, ixlenimm:$sew),[]>, + (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl, + ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLSEG { - let mayLoad = 1; - let mayStore = 0; - let hasSideEffects = 0; - let HasVLOp = 1; - let HasSEWOp = 1; -} - -class VPseudoUSSegLoadFFNoMaskTU NF>: - Pseudo<(outs RetClass:$rd, GPR:$vl), - (ins RetClass:$dest, GPRMem:$rs1, AVL:$avl, ixlenimm:$sew),[]>, - RISCVVPseudo, - RISCVVLSEG { + RISCVVLSEG { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; + let HasVecPolicyOp = 1; let Constraints = "$rd = $dest"; } @@ -1497,7 +1429,7 @@ (ins GetVRegNoV0.R:$merge, GPRMem:$rs1, VMaskOp:$vm, AVL:$avl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLSEG { + RISCVVLSEG { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -1510,26 +1442,16 @@ class VPseudoSSegLoadNoMask NF>: Pseudo<(outs RetClass:$rd), - (ins GPRMem:$rs1, GPR:$offset, AVL:$vl, ixlenimm:$sew),[]>, + (ins RetClass:$merge, GPRMem:$rs1, GPR:$offset, AVL:$vl, + ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLSEG { - let mayLoad = 1; - let mayStore = 0; - let hasSideEffects = 0; - let HasVLOp = 1; - let HasSEWOp = 1; -} - -class VPseudoSSegLoadNoMaskTU NF>: - Pseudo<(outs RetClass:$rd), - (ins RetClass:$merge, GPRMem:$rs1, GPR:$offset, AVL:$vl, ixlenimm:$sew),[]>, - RISCVVPseudo, - RISCVVLSEG { + RISCVVLSEG { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; let HasVLOp = 1; let HasSEWOp = 1; + let HasVecPolicyOp = 1; let Constraints = "$rd = $merge"; } @@ -1539,7 +1461,7 @@ GPR:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLSEG { + RISCVVLSEG { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -1553,25 +1475,10 @@ class VPseudoISegLoadNoMask LMUL, bits<4> NF, bit Ordered>: Pseudo<(outs RetClass:$rd), - (ins GPRMem:$rs1, IdxClass:$offset, AVL:$vl, ixlenimm:$sew),[]>, - RISCVVPseudo, - RISCVVLXSEG { - let mayLoad = 1; - let mayStore = 0; - let hasSideEffects = 0; - // For vector indexed segment loads, the destination vector register groups - // cannot overlap the source vector register group - let Constraints = "@earlyclobber $rd"; - let HasVLOp = 1; - let HasSEWOp = 1; -} - -class VPseudoISegLoadNoMaskTU LMUL, - bits<4> NF, bit Ordered>: - Pseudo<(outs RetClass:$rd), - (ins RetClass:$merge, GPRMem:$rs1, IdxClass:$offset, AVL:$vl, ixlenimm:$sew),[]>, + (ins RetClass:$merge, GPRMem:$rs1, IdxClass:$offset, AVL:$vl, + ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLXSEG { + RISCVVLXSEG { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -1580,6 +1487,7 @@ let Constraints = "@earlyclobber $rd, $rd = $merge"; let HasVLOp = 1; let HasSEWOp = 1; + let HasVecPolicyOp = 1; } class VPseudoISegLoadMask LMUL, @@ -1589,7 +1497,7 @@ IdxClass:$offset, VMaskOp:$vm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy),[]>, RISCVVPseudo, - RISCVVLXSEG { + RISCVVLXSEG { let mayLoad = 1; let mayStore = 0; let hasSideEffects = 0; @@ -1689,12 +1597,10 @@ def "E" # eew # "_V_" # LInfo : VPseudoUSLoadNoMask, VLESched; - def "E" # eew # "_V_" # LInfo # "_TU": - VPseudoUSLoadNoMaskTU, - VLESched; def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSLoadMask, - RISCVMaskedPseudo, + RISCVMaskedPseudo, VLESched; } } @@ -1710,12 +1616,10 @@ def "E" # eew # "FF_V_" # LInfo: VPseudoUSLoadFFNoMask, VLFSched; - def "E" # eew # "FF_V_" # LInfo # "_TU": - VPseudoUSLoadFFNoMaskTU, - VLFSched; def "E" # eew # "FF_V_" # LInfo # "_MASK": VPseudoUSLoadFFMask, - RISCVMaskedPseudo, + RISCVMaskedPseudo, VLFSched; } } @@ -1741,11 +1645,10 @@ let VLMul = lmul.value in { def "E" # eew # "_V_" # LInfo : VPseudoSLoadNoMask, VLSSched; - def "E" # eew # "_V_" # LInfo # "_TU": VPseudoSLoadNoMaskTU, - VLSSched; def "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSLoadMask, - RISCVMaskedPseudo, + RISCVMaskedPseudo, VLSSched; } } @@ -1772,12 +1675,10 @@ def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo : VPseudoILoadNoMask, VLXSched; - def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_TU": - VPseudoILoadNoMaskTU, - VLXSched; def "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" : VPseudoILoadMask, - RISCVMaskedPseudo, + RISCVMaskedPseudo, VLXSched; } } @@ -3738,8 +3639,6 @@ defvar vreg = SegRegClass.RC; def nf # "E" # eew # "_V_" # LInfo : VPseudoUSSegLoadNoMask, VLSEGSched; - def nf # "E" # eew # "_V_" # LInfo # "_TU" : - VPseudoUSSegLoadNoMaskTU, VLSEGSched; def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoUSSegLoadMask, VLSEGSched; } @@ -3757,8 +3656,6 @@ defvar vreg = SegRegClass.RC; def nf # "E" # eew # "FF_V_" # LInfo : VPseudoUSSegLoadFFNoMask, VLSEGFFSched; - def nf # "E" # eew # "FF_V_" # LInfo # "_TU" : - VPseudoUSSegLoadFFNoMaskTU, VLSEGFFSched; def nf # "E" # eew # "FF_V_" # LInfo # "_MASK" : VPseudoUSSegLoadFFMask, VLSEGFFSched; } @@ -3776,8 +3673,6 @@ defvar vreg = SegRegClass.RC; def nf # "E" # eew # "_V_" # LInfo : VPseudoSSegLoadNoMask, VLSSEGSched; - def nf # "E" # eew # "_V_" # LInfo # "_TU" : VPseudoSSegLoadNoMaskTU, - VLSSEGSched; def nf # "E" # eew # "_V_" # LInfo # "_MASK" : VPseudoSSegLoadMask, VLSSEGSched; } @@ -3807,10 +3702,6 @@ VPseudoISegLoadNoMask, VLXSEGSched; - def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_TU" : - VPseudoISegLoadNoMaskTU, - VLXSEGSched; def nf # "EI" # idxEEW # "_V_" # IdxLInfo # "_" # DataLInfo # "_MASK" : VPseudoISegLoadMask, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -34,7 +34,8 @@ defvar store_instr = !cast("PseudoVSE"#sew#"_V_"#vlmul.MX); // Load def : Pat<(type (load GPR:$rs1)), - (load_instr GPR:$rs1, avl, log2sew)>; + (load_instr (type (IMPLICIT_DEF)), GPR:$rs1, avl, + log2sew, TU_MU)>; // Store def : Pat<(store type:$rs2, GPR:$rs1), (store_instr reg_class:$rs2, GPR:$rs1, avl, log2sew)>; @@ -63,7 +64,8 @@ defvar store_instr = !cast("PseudoVSM_V_"#m.BX); // Load def : Pat<(m.Mask (load GPR:$rs1)), - (load_instr GPR:$rs1, m.AVL, m.Log2SEW)>; + (load_instr (m.Mask (IMPLICIT_DEF)), GPR:$rs1, m.AVL, + m.Log2SEW, TA_MA)>; // Store def : Pat<(store m.Mask:$rs2, GPR:$rs1), (store_instr VR:$rs2, GPR:$rs1, m.AVL, m.Log2SEW)>; diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.mir b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.mir deleted file mode 100644 --- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.mir +++ /dev/null @@ -1,224 +0,0 @@ -# REQUIRES: asserts -# RUN: llc %s -run-pass=greedy -debug -riscv-enable-subreg-liveness -o - \ -# RUN: -verify-machineinstrs 2>&1 \ -# RUN: | FileCheck %s ---- | - target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" - target triple = "riscv64" - - @var_47 = dso_local global [2 x i16] [i16 -32732, i16 19439], align 2 - @__const._Z3foov.var_49 = private unnamed_addr constant [2 x i16] [i16 157, i16 24062], align 2 - @__const._Z3foov.var_48 = private unnamed_addr constant [2 x i8] c"\AEN", align 1 - @__const._Z3foov.var_46 = private unnamed_addr constant [2 x i16] [i16 729, i16 -32215], align 2 - @__const._Z3foov.var_45 = private unnamed_addr constant [2 x i16] [i16 -27462, i16 -1435], align 2 - @__const._Z3foov.var_44 = private unnamed_addr constant [2 x i16] [i16 22611, i16 -18435], align 2 - @__const._Z3foov.var_40 = private unnamed_addr constant [2 x i16] [i16 -19932, i16 -26252], align 2 - - define void @_Z3foov() #0 { - entry: - %0 = tail call @llvm.riscv.vle.nxv8i16.i64( undef, ptr nonnull @__const._Z3foov.var_49, i64 2) - %1 = tail call @llvm.riscv.vle.nxv8i8.i64( undef, ptr nonnull @__const._Z3foov.var_48, i64 2) - %2 = tail call @llvm.riscv.vle.nxv8i16.i64( undef, ptr nonnull @__const._Z3foov.var_46, i64 2) - %3 = tail call @llvm.riscv.vle.nxv8i16.i64( undef, ptr nonnull @__const._Z3foov.var_45, i64 2) - tail call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() - %4 = tail call @llvm.riscv.vle.nxv8i16.i64( undef, ptr nonnull @__const._Z3foov.var_44, i64 2) - %5 = tail call i64 @llvm.riscv.vsetvli.i64(i64 2, i64 1, i64 1) - %6 = tail call @llvm.riscv.vle.nxv8i16.i64( undef, ptr nonnull @__const._Z3foov.var_40, i64 2) - %7 = tail call i64 @llvm.riscv.vsetvli.i64(i64 2, i64 1, i64 1) - %8 = tail call @llvm.riscv.vmsbc.nxv8i16.i16.i64( %6, i16 -15456, i64 2) - %9 = tail call i64 @llvm.riscv.vsetvli.i64(i64 2, i64 1, i64 1) - %10 = tail call @llvm.riscv.vsext.mask.nxv8i16.nxv8i8.i64( %0, %1, %8, i64 2, i64 0) - tail call void @llvm.riscv.vsseg4.nxv8i16.i64( %10, %2, %3, %4, ptr nonnull @var_47, i64 2) - ret void - } - - ; Function Attrs: nounwind readonly - declare @llvm.riscv.vle.nxv8i16.i64(, ptr nocapture, i64) #1 - - ; Function Attrs: nounwind readonly - declare @llvm.riscv.vle.nxv8i8.i64(, ptr nocapture, i64) #1 - - ; Function Attrs: nounwind - declare i64 @llvm.riscv.vsetvli.i64(i64, i64 immarg, i64 immarg) #2 - - ; Function Attrs: nounwind readnone - declare @llvm.riscv.vmsbc.nxv8i16.i16.i64(, i16, i64) #3 - - ; Function Attrs: nounwind readnone - declare @llvm.riscv.vsext.mask.nxv8i16.nxv8i8.i64(, , , i64, i64 immarg) #3 - - ; Function Attrs: nounwind writeonly - declare void @llvm.riscv.vsseg4.nxv8i16.i64(, , , , ptr nocapture, i64) #4 - - attributes #0 = { "target-features"="+v,+m,+zbb" } - attributes #1 = { nounwind readonly "target-features"="+v,+m,+zbb" } - attributes #2 = { nounwind "target-features"="+v,+m,+zbb" } - attributes #3 = { nounwind readnone "target-features"="+v,+m,+zbb" } - attributes #4 = { nounwind writeonly "target-features"="+v,+m,+zbb" } - -... ---- -name: _Z3foov -alignment: 4 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false -tracksRegLiveness: true -hasWinCFI: false -callsEHReturn: false -callsUnwindInit: false -hasEHCatchret: false -hasEHScopes: false -hasEHFunclets: false -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: gpr, preferred-register: '' } - - { id: 1, class: gpr, preferred-register: '' } - - { id: 2, class: vrm2nov0, preferred-register: '' } - - { id: 3, class: gpr, preferred-register: '' } - - { id: 4, class: gpr, preferred-register: '' } - - { id: 5, class: vr, preferred-register: '' } - - { id: 6, class: gpr, preferred-register: '' } - - { id: 7, class: gpr, preferred-register: '' } - - { id: 8, class: vrm2, preferred-register: '' } - - { id: 9, class: gpr, preferred-register: '' } - - { id: 10, class: gpr, preferred-register: '' } - - { id: 11, class: vrm2, preferred-register: '' } - - { id: 12, class: gpr, preferred-register: '' } - - { id: 13, class: gpr, preferred-register: '' } - - { id: 14, class: vrm2, preferred-register: '' } - - { id: 15, class: gpr, preferred-register: '' } - - { id: 16, class: gpr, preferred-register: '' } - - { id: 17, class: gpr, preferred-register: '' } - - { id: 18, class: vrm2, preferred-register: '' } - - { id: 19, class: gpr, preferred-register: '' } - - { id: 20, class: gpr, preferred-register: '' } - - { id: 21, class: gpr, preferred-register: '' } - - { id: 22, class: vr, preferred-register: '' } - - { id: 23, class: gpr, preferred-register: '' } - - { id: 24, class: vrm2nov0, preferred-register: '' } - - { id: 25, class: vrn4m2nov0, preferred-register: '' } - - { id: 26, class: gpr, preferred-register: '' } - - { id: 27, class: gpr, preferred-register: '' } -liveins: [] -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: - varArgsFrameIndex: 0 - varArgsSaveSize: 0 -body: | - bb.0.entry: - ; CHECK: 0B bb.0.entry: - ; CHECK-NEXT: 16B %0:gpr = LUI target-flags(riscv-hi) @__const._Z3foov.var_49 - ; CHECK-NEXT: 32B %1:gpr = ADDI %0:gpr, target-flags(riscv-lo) @__const._Z3foov.var_49 - ; CHECK-NEXT: 48B dead $x0 = PseudoVSETIVLI 2, 73, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: 64B undef %25.sub_vrm2_0:vrn4m2nov0 = PseudoVLE16_V_M2 %1:gpr, 2, 4, implicit $vl, implicit $vtype - ; CHECK-NEXT: 80B %3:gpr = LUI target-flags(riscv-hi) @__const._Z3foov.var_48 - ; CHECK-NEXT: 96B %4:gpr = ADDI %3:gpr, target-flags(riscv-lo) @__const._Z3foov.var_48 - ; CHECK-NEXT: 112B %5:vr = PseudoVLE8_V_M1 %4:gpr, 2, 3, implicit $vl, implicit $vtype - ; CHECK-NEXT: 128B %6:gpr = LUI target-flags(riscv-hi) @__const._Z3foov.var_46 - ; CHECK-NEXT: 144B %7:gpr = ADDI %6:gpr, target-flags(riscv-lo) @__const._Z3foov.var_46 - ; CHECK-NEXT: 160B %25.sub_vrm2_1:vrn4m2nov0 = PseudoVLE16_V_M2 %7:gpr, 2, 4, implicit $vl, implicit $vtype - ; CHECK-NEXT: 176B %9:gpr = LUI target-flags(riscv-hi) @__const._Z3foov.var_45 - ; CHECK-NEXT: 192B %10:gpr = ADDI %9:gpr, target-flags(riscv-lo) @__const._Z3foov.var_45 - ; CHECK-NEXT: 208B %25.sub_vrm2_2:vrn4m2nov0 = PseudoVLE16_V_M2 %10:gpr, 2, 4, implicit $vl, implicit $vtype - ; CHECK-NEXT: 224B INLINEASM &"" [sideeffect] [attdialect], $0:[clobber], implicit-def dead early-clobber $v0, $1:[clobber], implicit-def dead early-clobber $v1, $2:[clobber], implicit-def dead early-clobber $v2, $3:[clobber], implicit-def dead early-clobber $v3, $4:[clobber], implicit-def dead early-clobber $v4, $5:[clobber], implicit-def dead early-clobber $v5, $6:[clobber], implicit-def dead early-clobber $v6, $7:[clobber], implicit-def dead early-clobber $v7, $8:[clobber], implicit-def dead early-clobber $v8, $9:[clobber], implicit-def dead early-clobber $v9, $10:[clobber], implicit-def dead early-clobber $v10, $11:[clobber], implicit-def dead early-clobber $v11, $12:[clobber], implicit-def dead early-clobber $v12, $13:[clobber], implicit-def dead early-clobber $v13, $14:[clobber], implicit-def dead early-clobber $v14, $15:[clobber], implicit-def dead early-clobber $v15, $16:[clobber], implicit-def dead early-clobber $v16, $17:[clobber], implicit-def dead early-clobber $v17, $18:[clobber], implicit-def dead early-clobber $v18, $19:[clobber], implicit-def dead early-clobber $v19, $20:[clobber], implicit-def dead early-clobber $v20, $21:[clobber], implicit-def dead early-clobber $v21, $22:[clobber], implicit-def dead early-clobber $v22, $23:[clobber], implicit-def dead early-clobber $v23, $24:[clobber], implicit-def dead early-clobber $v24, $25:[clobber], implicit-def dead early-clobber $v25, $26:[clobber], implicit-def dead early-clobber $v26, $27:[clobber], implicit-def dead early-clobber $v27, $28:[clobber], implicit-def dead early-clobber $v28, $29:[clobber], implicit-def dead early-clobber $v29, $30:[clobber], implicit-def dead early-clobber $v30, $31:[clobber], implicit-def dead early-clobber $v31 - ; CHECK-NEXT: 240B %12:gpr = LUI target-flags(riscv-hi) @__const._Z3foov.var_44 - ; CHECK-NEXT: 256B %13:gpr = ADDI %12:gpr, target-flags(riscv-lo) @__const._Z3foov.var_44 - ; CHECK-NEXT: 272B dead $x0 = PseudoVSETIVLI 2, 73, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: 288B %25.sub_vrm2_3:vrn4m2nov0 = PseudoVLE16_V_M2 %13:gpr, 2, 4, implicit $vl, implicit $vtype - ; CHECK-NEXT: 304B $x0 = PseudoVSETIVLI 2, 73, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: 320B %16:gpr = LUI target-flags(riscv-hi) @__const._Z3foov.var_40 - ; CHECK-NEXT: 336B %17:gpr = ADDI %16:gpr, target-flags(riscv-lo) @__const._Z3foov.var_40 - ; CHECK-NEXT: 352B %18:vrm2 = PseudoVLE16_V_M2 %17:gpr, 2, 4, implicit $vl, implicit $vtype - ; CHECK-NEXT: 368B $x0 = PseudoVSETIVLI 2, 73, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: 384B %20:gpr = LUI 1048572 - ; CHECK-NEXT: 400B %21:gpr = ADDIW %20:gpr, 928 - ; CHECK-NEXT: 416B early-clobber %22:vr = PseudoVMSBC_VX_M2 %18:vrm2, %21:gpr, 2, 4, implicit $vl, implicit $vtype - ; CHECK-NEXT: 432B $x0 = PseudoVSETIVLI 2, 9, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: 448B $v0 = COPY %22:vr - ; CHECK-NEXT: 464B early-clobber %25.sub_vrm2_0:vrn4m2nov0 = PseudoVSEXT_VF2_M2_MASK %25.sub_vrm2_0:vrn4m2nov0(tied-def 0), %5:vr, killed $v0, 2, 4, 0, implicit $vl, implicit $vtype - ; CHECK-NEXT: 480B %26:gpr = LUI target-flags(riscv-hi) @var_47 - ; CHECK-NEXT: 496B %27:gpr = ADDI %26:gpr, target-flags(riscv-lo) @var_47 - ; CHECK-NEXT: 512B PseudoVSSEG4E16_V_M2 %25:vrn4m2nov0, %27:gpr, 2, 4, implicit $vl, implicit $vtype - ; CHECK-NEXT: 528B PseudoRET - %0:gpr = LUI target-flags(riscv-hi) @__const._Z3foov.var_49 - %1:gpr = ADDI %0, target-flags(riscv-lo) @__const._Z3foov.var_49 - dead $x0 = PseudoVSETIVLI 2, 73 /* e16, m2, ta, mu */, implicit-def $vl, implicit-def $vtype - undef %25.sub_vrm2_0:vrn4m2nov0 = PseudoVLE16_V_M2 %1, 2, 4 /* e16 */, implicit $vl, implicit $vtype - %3:gpr = LUI target-flags(riscv-hi) @__const._Z3foov.var_48 - %4:gpr = ADDI %3, target-flags(riscv-lo) @__const._Z3foov.var_48 - %5:vr = PseudoVLE8_V_M1 %4, 2, 3 /* e8 */, implicit $vl, implicit $vtype - %6:gpr = LUI target-flags(riscv-hi) @__const._Z3foov.var_46 - %7:gpr = ADDI %6, target-flags(riscv-lo) @__const._Z3foov.var_46 - %25.sub_vrm2_1:vrn4m2nov0 = PseudoVLE16_V_M2 %7, 2, 4 /* e16 */, implicit $vl, implicit $vtype - %9:gpr = LUI target-flags(riscv-hi) @__const._Z3foov.var_45 - %10:gpr = ADDI %9, target-flags(riscv-lo) @__const._Z3foov.var_45 - %25.sub_vrm2_2:vrn4m2nov0 = PseudoVLE16_V_M2 %10, 2, 4 /* e16 */, implicit $vl, implicit $vtype - INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $v0, 12 /* clobber */, implicit-def dead early-clobber $v1, 12 /* clobber */, implicit-def dead early-clobber $v2, 12 /* clobber */, implicit-def dead early-clobber $v3, 12 /* clobber */, implicit-def dead early-clobber $v4, 12 /* clobber */, implicit-def dead early-clobber $v5, 12 /* clobber */, implicit-def dead early-clobber $v6, 12 /* clobber */, implicit-def dead early-clobber $v7, 12 /* clobber */, implicit-def dead early-clobber $v8, 12 /* clobber */, implicit-def dead early-clobber $v9, 12 /* clobber */, implicit-def dead early-clobber $v10, 12 /* clobber */, implicit-def dead early-clobber $v11, 12 /* clobber */, implicit-def dead early-clobber $v12, 12 /* clobber */, implicit-def dead early-clobber $v13, 12 /* clobber */, implicit-def dead early-clobber $v14, 12 /* clobber */, implicit-def dead early-clobber $v15, 12 /* clobber */, implicit-def dead early-clobber $v16, 12 /* clobber */, implicit-def dead early-clobber $v17, 12 /* clobber */, implicit-def dead early-clobber $v18, 12 /* clobber */, implicit-def dead early-clobber $v19, 12 /* clobber */, implicit-def dead early-clobber $v20, 12 /* clobber */, implicit-def dead early-clobber $v21, 12 /* clobber */, implicit-def dead early-clobber $v22, 12 /* clobber */, implicit-def dead early-clobber $v23, 12 /* clobber */, implicit-def dead early-clobber $v24, 12 /* clobber */, implicit-def dead early-clobber $v25, 12 /* clobber */, implicit-def dead early-clobber $v26, 12 /* clobber */, implicit-def dead early-clobber $v27, 12 /* clobber */, implicit-def dead early-clobber $v28, 12 /* clobber */, implicit-def dead early-clobber $v29, 12 /* clobber */, implicit-def dead early-clobber $v30, 12 /* clobber */, implicit-def dead early-clobber $v31 - %12:gpr = LUI target-flags(riscv-hi) @__const._Z3foov.var_44 - %13:gpr = ADDI %12, target-flags(riscv-lo) @__const._Z3foov.var_44 - dead $x0 = PseudoVSETIVLI 2, 73 /* e16, m2, ta, mu */, implicit-def $vl, implicit-def $vtype - %25.sub_vrm2_3:vrn4m2nov0 = PseudoVLE16_V_M2 %13, 2, 4 /* e16 */, implicit $vl, implicit $vtype - $x0 = PseudoVSETIVLI 2, 73 /* e16, m2, ta, mu */, implicit-def $vl, implicit-def $vtype - %16:gpr = LUI target-flags(riscv-hi) @__const._Z3foov.var_40 - %17:gpr = ADDI %16, target-flags(riscv-lo) @__const._Z3foov.var_40 - %18:vrm2 = PseudoVLE16_V_M2 %17, 2, 4 /* e16 */, implicit $vl, implicit $vtype - $x0 = PseudoVSETIVLI 2, 73 /* e16, m2, ta, mu */, implicit-def $vl, implicit-def $vtype - %20:gpr = LUI 1048572 - %21:gpr = ADDIW %20, 928 - early-clobber %22:vr = PseudoVMSBC_VX_M2 %18, %21, 2, 4 /* e16 */, implicit $vl, implicit $vtype - $x0 = PseudoVSETIVLI 2, 9 /* e16, m2, tu, mu */, implicit-def $vl, implicit-def $vtype - $v0 = COPY %22 - early-clobber %25.sub_vrm2_0:vrn4m2nov0 = PseudoVSEXT_VF2_M2_MASK %25.sub_vrm2_0, %5, killed $v0, 2, 4 /* e16 */, 0, implicit $vl, implicit $vtype - ; CHECK: Best local split range: 64r-208r, 6.999861e-03, 3 instrs - ; CHECK-NEXT: enterIntvBefore 64r: not live - ; CHECK-NEXT: leaveIntvAfter 208r: valno 1 - ; CHECK-NEXT: useIntv [64B;216r): [64B;216r):1 - ; CHECK-NEXT: blit [64r,160r:4): [64r;160r)=1(%29)(recalc) - ; CHECK-NEXT: blit [160r,208r:0): [160r;208r)=1(%29)(recalc) - ; CHECK-NEXT: blit [208r,288r:1): [208r;216r)=1(%29)(recalc) [216r;288r)=0(%28)(recalc) - ; CHECK-NEXT: blit [288r,464e:2): [288r;464e)=0(%28)(recalc) - ; CHECK-NEXT: blit [464e,512r:3): [464e;512r)=0(%28)(recalc) - ; CHECK-NEXT: rewr %bb.0 464e:0 early-clobber %28.sub_vrm2_0:vrn4m2nov0 = PseudoVSEXT_VF2_M2_MASK %25.sub_vrm2_0:vrn4m2nov0(tied-def 0), %5:vr, $v0, 2, 4, 0, implicit $vl, implicit $vtype - ; CHECK-NEXT: rewr %bb.0 288r:0 %28.sub_vrm2_3:vrn4m2nov0 = PseudoVLE16_V_M2 %13:gpr, 2, 4, implicit $vl, implicit $vtype - ; CHECK-NEXT: rewr %bb.0 208r:1 %29.sub_vrm2_2:vrn4m2nov0 = PseudoVLE16_V_M2 %10:gpr, 2, 4, implicit $vl, implicit $vtype - ; CHECK-NEXT: rewr %bb.0 160r:1 %29.sub_vrm2_1:vrn4m2nov0 = PseudoVLE16_V_M2 %7:gpr, 2, 4, implicit $vl, implicit $vtype - ; CHECK-NEXT: rewr %bb.0 64r:1 undef %29.sub_vrm2_0:vrn4m2nov0 = PseudoVLE16_V_M2 %1:gpr, 2, 4, implicit $vl, implicit $vtype - ; CHECK-NEXT: rewr %bb.0 464B:0 early-clobber %28.sub_vrm2_0:vrn4m2nov0 = PseudoVSEXT_VF2_M2_MASK %28.sub_vrm2_0:vrn4m2nov0(tied-def 0), %5:vr, $v0, 2, 4, 0, implicit $vl, implicit $vtype - ; CHECK-NEXT: rewr %bb.0 512B:0 PseudoVSSEG4E16_V_M2 %28:vrn4m2nov0, %27:gpr, 2, 4, implicit $vl, implicit $vtype - ; CHECK-NEXT: rewr %bb.0 216B:1 undef %28.sub_vrm1_0_sub_vrm1_1_sub_vrm1_2_sub_vrm1_3_sub_vrm1_4_sub_vrm1_5:vrn4m2nov0 = COPY %29.sub_vrm1_0_sub_vrm1_1_sub_vrm1_2_sub_vrm1_3_sub_vrm1_4_sub_vrm1_5:vrn4m2nov0 - ; CHECK-NEXT: queuing new interval: %28 [216r,288r:0)[288r,464e:1)[464e,512r:2) 0@216r 1@288r 2@464e L000000000000000C [216r,464e:0)[464e,512r:1) 0@216r 1@464e L0000000000000300 [288r,512r:0) 0@288r L00000000000000C0 [216r,512r:0) 0@216r L0000000000000030 [216r,512r:0) 0@216r weight:8.706897e-03 - %26:gpr = LUI target-flags(riscv-hi) @var_47 - %27:gpr = ADDI %26, target-flags(riscv-lo) @var_47 - PseudoVSSEG4E16_V_M2 %25, %27, 2, 4 /* e16 */, implicit $vl, implicit $vtype - PseudoRET - -... diff --git a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir --- a/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir +++ b/llvm/test/CodeGen/RISCV/rvv/addi-scalable-offset.mir @@ -41,7 +41,7 @@ ; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 1 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12 ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: renamable $v8 = PseudoVLE64_V_M1 killed renamable $x10, $noreg, 6 /* e64 */, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) + ; CHECK-NEXT: renamable $v8 = PseudoVLE64_V_M1 undef renamable $v8, killed renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load unknown-size from %ir.pa, align 8) ; CHECK-NEXT: $x10 = PseudoReadVLENB ; CHECK-NEXT: $x10 = SLLI killed $x10, 1 ; CHECK-NEXT: $x10 = SUB $x8, killed $x10 @@ -57,7 +57,8 @@ ; CHECK-NEXT: PseudoRET %1:gprnox0 = COPY $x11 %0:gpr = COPY $x10 - %2:vr = PseudoVLE64_V_M1 %0, %1, 6 :: (load unknown-size from %ir.pa, align 8) + %pt:vr = IMPLICIT_DEF + %2:vr = PseudoVLE64_V_M1 %pt, %0, %1, 6, 0 :: (load unknown-size from %ir.pa, align 8) %3:gpr = ADDI %stack.2, 0 VS1R_V killed %2:vr, %3:gpr PseudoRET diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -986,38 +986,38 @@ ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32-NEXT: vsll.vx v9, v8, a0 -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: vand.vx v10, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsll.vx v10, v10, a2 -; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 ; RV32-NEXT: mv a3, sp -; RV32-NEXT: vlse64.v v10, (a3), zero -; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v11, v8, a3 -; RV32-NEXT: vsll.vi v11, v11, 24 -; RV32-NEXT: vand.vv v12, v8, v10 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v11, v11, v12 -; RV32-NEXT: vor.vv v9, v9, v11 -; RV32-NEXT: vsrl.vx v11, v8, a0 -; RV32-NEXT: vsrl.vx v12, v8, a2 -; RV32-NEXT: vand.vx v12, v12, a1 -; RV32-NEXT: vor.vv v11, v12, v11 +; RV32-NEXT: vlse64.v v11, (a3), zero +; RV32-NEXT: vand.vx v10, v10, a2 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vsrl.vi v10, v8, 8 +; RV32-NEXT: vand.vv v10, v10, v11 ; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: lui a3, 4080 ; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vsll.vx v10, v8, a0 +; RV32-NEXT: vand.vx v12, v8, a2 +; RV32-NEXT: vsll.vx v12, v12, a1 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vand.vx v12, v8, a3 +; RV32-NEXT: vsll.vi v12, v12, 24 +; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: addi a0, sp, 24 -; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vor.vv v8, v8, v11 -; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vlse64.v v11, (a0), zero +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vand.vv v9, v9, v10 -; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vand.vv v9, v9, v11 +; RV32-NEXT: vand.vv v8, v8, v11 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vlse64.v v10, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 4 @@ -1123,38 +1123,38 @@ ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32-NEXT: vsll.vx v10, v8, a0 -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: vand.vx v12, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsll.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vsrl.vx v10, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 ; RV32-NEXT: mv a3, sp -; RV32-NEXT: vlse64.v v12, (a3), zero -; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v14, v8, a3 -; RV32-NEXT: vsll.vi v14, v14, 24 -; RV32-NEXT: vand.vv v16, v8, v12 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v14, v14, v16 -; RV32-NEXT: vor.vv v10, v10, v14 -; RV32-NEXT: vsrl.vx v14, v8, a0 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: vand.vx v16, v16, a1 -; RV32-NEXT: vor.vv v14, v16, v14 +; RV32-NEXT: vlse64.v v14, (a3), zero +; RV32-NEXT: vand.vx v12, v12, a2 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: vand.vv v12, v12, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: lui a3, 4080 ; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsll.vx v12, v8, a0 +; RV32-NEXT: vand.vx v16, v8, a2 +; RV32-NEXT: vsll.vx v16, v16, a1 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v16, v8, a3 +; RV32-NEXT: vsll.vi v16, v16, 24 +; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: addi a0, sp, 24 -; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vor.vv v8, v8, v14 -; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vlse64.v v14, (a0), zero +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 4 -; RV32-NEXT: vand.vv v10, v10, v12 -; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vand.vv v10, v10, v14 +; RV32-NEXT: vand.vv v8, v8, v14 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vlse64.v v12, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 4 @@ -1260,38 +1260,38 @@ ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32-NEXT: vsll.vx v12, v8, a0 -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: vand.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsll.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vsrl.vx v12, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 ; RV32-NEXT: mv a3, sp -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v20, v8, a3 -; RV32-NEXT: vsll.vi v20, v20, 24 -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v20, v20, v24 -; RV32-NEXT: vor.vv v12, v12, v20 -; RV32-NEXT: vsrl.vx v20, v8, a0 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: vand.vx v24, v24, a1 -; RV32-NEXT: vor.vv v20, v24, v20 +; RV32-NEXT: vlse64.v v20, (a3), zero +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v20 ; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: lui a3, 4080 ; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsll.vx v16, v8, a0 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: vsll.vx v24, v24, a1 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vand.vx v24, v8, a3 +; RV32-NEXT: vsll.vi v24, v24, 24 +; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: addi a0, sp, 24 -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vor.vv v8, v8, v20 -; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vlse64.v v20, (a0), zero +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vand.vv v12, v12, v16 -; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vand.vv v12, v12, v20 +; RV32-NEXT: vand.vv v8, v8, v20 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 4 @@ -1401,42 +1401,42 @@ ; RV32-NEXT: sw a0, 24(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a0 -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: vand.vx v24, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsll.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v16, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v0, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 ; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vlse64.v v24, (a3), zero +; RV32-NEXT: vand.vx v0, v0, a2 +; RV32-NEXT: vor.vv v16, v0, v16 +; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v0, v0, v24 ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vi v0, v0, 24 -; RV32-NEXT: vand.vv v16, v8, v24 -; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vand.vx v16, v16, a3 ; RV32-NEXT: vor.vv v16, v0, v16 ; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v0, v16 +; RV32-NEXT: vor.vv v16, v16, v0 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v0, v8, a2 -; RV32-NEXT: vand.vx v0, v0, a1 -; RV32-NEXT: vsrl.vx v16, v8, a0 -; RV32-NEXT: vor.vv v0, v0, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v0, v8, a2 +; RV32-NEXT: vsll.vx v0, v0, a1 +; RV32-NEXT: vsll.vx v16, v8, a0 +; RV32-NEXT: vor.vv v0, v16, v0 +; RV32-NEXT: vand.vv v16, v8, v24 ; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: addi a0, sp, 40 ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vor.vv v8, v8, v0 +; RV32-NEXT: vor.vv v8, v0, v8 ; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsrl.vi v24, v8, 4 ; RV32-NEXT: vand.vv v24, v24, v16 ; RV32-NEXT: vand.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -1988,35 +1988,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsll.vx v9, v8, a1, v0.t -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v10, v8, a2, v0.t -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v10, v10, a3, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t +; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v10, v10, a3, v0.t +; RV32-NEXT: vor.vv v9, v10, v9, v0.t +; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4, v0.t -; RV32-NEXT: vsll.vi v10, v10, 24, v0.t +; RV32-NEXT: vand.vx v10, v10, a4, v0.t +; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t ; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero +; RV32-NEXT: vlse64.v v12, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11, v0.t -; RV32-NEXT: vsll.vi v12, v12, 8, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v12, v8, a3, v0.t -; RV32-NEXT: vand.vx v12, v12, a2, v0.t -; RV32-NEXT: vor.vv v10, v12, v10, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t -; RV32-NEXT: vand.vx v12, v12, a4, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v11, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t +; RV32-NEXT: vand.vv v11, v11, v12, v0.t +; RV32-NEXT: vor.vv v10, v11, v10, v0.t +; RV32-NEXT: vor.vv v9, v10, v9, v0.t +; RV32-NEXT: vsll.vx v10, v8, a1, v0.t +; RV32-NEXT: vand.vx v11, v8, a3, v0.t +; RV32-NEXT: vsll.vx v11, v11, a2, v0.t +; RV32-NEXT: vor.vv v10, v10, v11, v0.t +; RV32-NEXT: vand.vx v11, v8, a4, v0.t +; RV32-NEXT: vsll.vi v11, v11, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v11, v8, v0.t +; RV32-NEXT: vor.vv v8, v10, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v9, v0.t ; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma @@ -2132,35 +2132,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsll.vx v9, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v10, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v10, v10, a3 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4 -; RV32-NEXT: vsll.vi v10, v10, 24 -; RV32-NEXT: mv a5, sp -; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero +; RV32-NEXT: vsrl.vx v9, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v10, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vsrl.vi v10, v8, 8 +; RV32-NEXT: mv a4, sp +; RV32-NEXT: vsetvli a5, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v11, (a4), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: vsrl.vx v12, v8, a3 -; RV32-NEXT: vand.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vand.vv v10, v10, v11 ; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: vand.vx v12, v8, a3 +; RV32-NEXT: vsll.vx v12, v12, a2 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vand.vx v12, v8, a4 +; RV32-NEXT: vsll.vi v12, v12, 24 ; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m1, ta, ma @@ -2280,35 +2280,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsll.vx v10, v8, a1, v0.t -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v12, v8, a2, v0.t -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v12, v12, a3, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t +; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v12, v12, a3, v0.t +; RV32-NEXT: vor.vv v10, v12, v10, v0.t +; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v12, v12, 24, v0.t +; RV32-NEXT: vand.vx v12, v12, a4, v0.t +; RV32-NEXT: vsrl.vi v14, v8, 8, v0.t ; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v12, v12, v16, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v12, v16, v12, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV32-NEXT: vand.vx v16, v16, a4, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v14, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t +; RV32-NEXT: vand.vv v14, v14, v16, v0.t +; RV32-NEXT: vor.vv v12, v14, v12, v0.t +; RV32-NEXT: vor.vv v10, v12, v10, v0.t +; RV32-NEXT: vsll.vx v12, v8, a1, v0.t +; RV32-NEXT: vand.vx v14, v8, a3, v0.t +; RV32-NEXT: vsll.vx v14, v14, a2, v0.t +; RV32-NEXT: vor.vv v12, v12, v14, v0.t +; RV32-NEXT: vand.vx v14, v8, a4, v0.t +; RV32-NEXT: vsll.vi v14, v14, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v14, v8, v0.t +; RV32-NEXT: vor.vv v8, v12, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: vsrl.vi v10, v8, 4, v0.t ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma @@ -2424,35 +2424,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsll.vx v10, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v12, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v12, v12, a3 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: mv a5, sp -; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v12, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v12, v12, a3 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: mv a4, sp +; RV32-NEXT: vsetvli a5, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v14, (a4), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: vsrl.vx v16, v8, a3 -; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vand.vv v12, v12, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: vand.vx v16, v8, a3 +; RV32-NEXT: vsll.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v16, v8, a4 +; RV32-NEXT: vsll.vi v16, v16, 24 ; RV32-NEXT: vand.vv v8, v8, v14 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 4 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m2, ta, ma @@ -2572,35 +2572,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsll.vx v12, v8, a1, v0.t -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v16, v8, a2, v0.t -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v16, v16, a3, v0.t -; RV32-NEXT: vor.vv v16, v12, v16, v0.t +; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v16, v16, a3, v0.t +; RV32-NEXT: vor.vv v12, v16, v12, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v20, v12, 24, v0.t +; RV32-NEXT: vand.vx v20, v16, a4, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t ; RV32-NEXT: mv a5, sp ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v8, v12, v0.t -; RV32-NEXT: vsll.vi v24, v24, 8, v0.t -; RV32-NEXT: vor.vv v20, v20, v24, v0.t -; RV32-NEXT: vor.vv v16, v16, v20, v0.t -; RV32-NEXT: vsrl.vx v20, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vand.vv v24, v24, v16, v0.t ; RV32-NEXT: vor.vv v20, v24, v20, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a4, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: vor.vv v8, v8, v20, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v12, v20, v12, v0.t +; RV32-NEXT: vsll.vx v20, v8, a1, v0.t +; RV32-NEXT: vand.vx v24, v8, a3, v0.t +; RV32-NEXT: vsll.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v20, v20, v24, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: vor.vv v8, v20, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma @@ -2716,35 +2716,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v16, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: mv a5, sp -; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v20, (a5), zero +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v16, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v16, v16, a3 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: mv a4, sp +; RV32-NEXT: vsetvli a5, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v20, (a4), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v8, v20 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: vsrl.vx v24, v8, a3 -; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vand.vv v16, v16, v20 ; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v24, v24, a4 -; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vand.vx v24, v8, a3 +; RV32-NEXT: vsll.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vand.vx v24, v8, a4 +; RV32-NEXT: vsll.vi v24, v24, 24 ; RV32-NEXT: vand.vv v8, v8, v20 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma @@ -2869,73 +2869,73 @@ ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a1, v0.t -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v8, a2, v0.t -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v24, v24, a3, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a4, sp, 48 +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: vand.vx v16, v16, a4, v0.t +; RV32-NEXT: addi a5, sp, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v24, (a5), zero ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: addi a5, sp, 48 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsll.vx v24, v8, a1, v0.t +; RV32-NEXT: vand.vx v16, v8, a3, v0.t +; RV32-NEXT: vsll.vx v16, v16, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a4, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vand.vx v16, v8, a4, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma @@ -3076,42 +3076,42 @@ ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 16 -; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vand.vx v24, v24, a4 ; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a5, sp, 48 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vor.vv v24, v24, v0 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v0, v8, a3 -; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vsrl.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v0, v8, a3 +; RV32-NEXT: vsll.vx v0, v0, a2 +; RV32-NEXT: vsll.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma @@ -3239,73 +3239,73 @@ ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a1, v0.t -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v8, a2, v0.t -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v24, v24, a3, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a4, sp, 48 +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 48 +; RV32-NEXT: vand.vx v16, v16, a4, v0.t +; RV32-NEXT: addi a5, sp, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v24, (a5), zero ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: addi a5, sp, 48 +; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: addi a5, sp, 48 -; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 4 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 48 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsll.vx v24, v8, a1, v0.t +; RV32-NEXT: vand.vx v16, v8, a3, v0.t +; RV32-NEXT: vsll.vx v16, v16, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a4, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vand.vx v16, v8, a4, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma @@ -3446,42 +3446,42 @@ ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 16 -; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vand.vx v24, v24, a4 ; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a5, sp, 48 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vor.vv v24, v24, v0 ; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v0, v8, a3 -; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vsrl.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v0, v8, a3 +; RV32-NEXT: vsll.vx v0, v0, a2 +; RV32-NEXT: vsll.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll @@ -275,33 +275,33 @@ ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma -; RV32-NEXT: vsll.vx v9, v8, a0 -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: vand.vx v10, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsll.vx v10, v10, a2 -; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: vsrl.vx v9, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 ; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v10, (a3), zero -; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v11, v8, a3 -; RV32-NEXT: vsll.vi v11, v11, 24 -; RV32-NEXT: vand.vv v12, v8, v10 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v11, v11, v12 -; RV32-NEXT: vor.vv v9, v9, v11 -; RV32-NEXT: vsrl.vx v11, v8, a0 -; RV32-NEXT: vsrl.vx v12, v8, a2 -; RV32-NEXT: vand.vx v12, v12, a1 -; RV32-NEXT: vor.vv v11, v12, v11 +; RV32-NEXT: vlse64.v v11, (a3), zero +; RV32-NEXT: vand.vx v10, v10, a2 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vsrl.vi v10, v8, 8 +; RV32-NEXT: vand.vv v10, v10, v11 ; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: lui a3, 4080 ; RV32-NEXT: vand.vx v12, v12, a3 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v11 -; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vsll.vx v10, v8, a0 +; RV32-NEXT: vand.vx v12, v8, a2 +; RV32-NEXT: vsll.vx v12, v12, a1 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vand.vx v12, v8, a3 +; RV32-NEXT: vsll.vi v12, v12, 24 +; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -352,33 +352,33 @@ ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; RV32-NEXT: vsll.vx v10, v8, a0 -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: vand.vx v12, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsll.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vsrl.vx v10, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 ; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v12, (a3), zero -; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v14, v8, a3 -; RV32-NEXT: vsll.vi v14, v14, 24 -; RV32-NEXT: vand.vv v16, v8, v12 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v14, v14, v16 -; RV32-NEXT: vor.vv v10, v10, v14 -; RV32-NEXT: vsrl.vx v14, v8, a0 -; RV32-NEXT: vsrl.vx v16, v8, a2 -; RV32-NEXT: vand.vx v16, v16, a1 -; RV32-NEXT: vor.vv v14, v16, v14 +; RV32-NEXT: vlse64.v v14, (a3), zero +; RV32-NEXT: vand.vx v12, v12, a2 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: vand.vv v12, v12, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: lui a3, 4080 ; RV32-NEXT: vand.vx v16, v16, a3 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v14 -; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsll.vx v12, v8, a0 +; RV32-NEXT: vand.vx v16, v8, a2 +; RV32-NEXT: vsll.vx v16, v16, a1 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v16, v8, a3 +; RV32-NEXT: vsll.vi v16, v16, 24 +; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -429,33 +429,33 @@ ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma -; RV32-NEXT: vsll.vx v12, v8, a0 -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: vand.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsll.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vsrl.vx v12, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 ; RV32-NEXT: addi a3, sp, 8 -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v20, v8, a3 -; RV32-NEXT: vsll.vi v20, v20, 24 -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v20, v20, v24 -; RV32-NEXT: vor.vv v12, v12, v20 -; RV32-NEXT: vsrl.vx v20, v8, a0 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: vand.vx v24, v24, a1 -; RV32-NEXT: vor.vv v20, v24, v20 +; RV32-NEXT: vlse64.v v20, (a3), zero +; RV32-NEXT: vand.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vand.vv v16, v16, v20 ; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: lui a3, 4080 ; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vsrl.vi v8, v8, 8 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v20 -; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsll.vx v16, v8, a0 +; RV32-NEXT: vand.vx v24, v8, a2 +; RV32-NEXT: vsll.vx v24, v24, a1 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vand.vx v24, v8, a3 +; RV32-NEXT: vsll.vi v24, v24, 24 +; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -510,40 +510,40 @@ ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a0 -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -256 -; RV32-NEXT: vand.vx v24, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsll.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v24, v8, a0 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: vsrl.vx v0, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 ; RV32-NEXT: addi a3, sp, 8 ; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vand.vx v0, v0, a2 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: vand.vv v0, v0, v16 ; RV32-NEXT: lui a3, 4080 -; RV32-NEXT: vand.vx v0, v8, a3 -; RV32-NEXT: vsll.vi v0, v0, 24 -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vand.vx v24, v24, a3 ; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vor.vv v24, v24, v0 ; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v0, v8, a2 -; RV32-NEXT: vand.vx v0, v0, a1 -; RV32-NEXT: vsrl.vx v24, v8, a0 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v0, v8, a2 +; RV32-NEXT: vsll.vx v0, v0, a1 +; RV32-NEXT: vsll.vx v24, v8, a0 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 ; RV32-NEXT: vand.vx v8, v8, a3 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -564,35 +564,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsll.vx v9, v8, a1, v0.t -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v10, v8, a2, v0.t -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v10, v10, a3, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t +; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v10, v10, a3, v0.t +; RV32-NEXT: vor.vv v9, v10, v9, v0.t +; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4, v0.t -; RV32-NEXT: vsll.vi v10, v10, 24, v0.t +; RV32-NEXT: vand.vx v10, v10, a4, v0.t +; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero +; RV32-NEXT: vlse64.v v12, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11, v0.t -; RV32-NEXT: vsll.vi v12, v12, 8, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vor.vv v9, v9, v10, v0.t -; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v12, v8, a3, v0.t -; RV32-NEXT: vand.vx v12, v12, a2, v0.t -; RV32-NEXT: vor.vv v10, v12, v10, v0.t -; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t -; RV32-NEXT: vand.vx v12, v12, a4, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v11, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v10, v0.t -; RV32-NEXT: vor.vv v8, v9, v8, v0.t +; RV32-NEXT: vand.vv v11, v11, v12, v0.t +; RV32-NEXT: vor.vv v10, v11, v10, v0.t +; RV32-NEXT: vor.vv v9, v10, v9, v0.t +; RV32-NEXT: vsll.vx v10, v8, a1, v0.t +; RV32-NEXT: vand.vx v11, v8, a3, v0.t +; RV32-NEXT: vsll.vx v11, v11, a2, v0.t +; RV32-NEXT: vor.vv v10, v10, v11, v0.t +; RV32-NEXT: vand.vx v11, v8, a4, v0.t +; RV32-NEXT: vsll.vi v11, v11, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v12, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v11, v8, v0.t +; RV32-NEXT: vor.vv v8, v10, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v9, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -642,35 +642,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsll.vx v9, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v10, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v10, v10, a3 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v10, v8, a4 -; RV32-NEXT: vsll.vi v10, v10, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a5), zero +; RV32-NEXT: vsrl.vx v9, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v10, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v10, v10, a3 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vsrl.vi v10, v8, 8 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v11, (a4), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v12, v8, v11 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: vsrl.vx v12, v8, a3 -; RV32-NEXT: vand.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vand.vv v10, v10, v11 ; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v12, v12, a0 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: vand.vx v12, v8, a3 +; RV32-NEXT: vsll.vx v12, v12, a2 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vsll.vi v12, v12, 24 ; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v8, v10 -; RV32-NEXT: vor.vv v8, v9, v8 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -724,35 +724,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsll.vx v10, v8, a1, v0.t -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v12, v8, a2, v0.t -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v12, v12, a3, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t +; RV32-NEXT: vsrl.vx v10, v8, a1, v0.t +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v12, v12, a3, v0.t +; RV32-NEXT: vor.vv v10, v12, v10, v0.t +; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v12, v12, 24, v0.t +; RV32-NEXT: vand.vx v12, v12, a4, v0.t +; RV32-NEXT: vsrl.vi v14, v8, 8, v0.t ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t -; RV32-NEXT: vor.vv v12, v12, v16, v0.t -; RV32-NEXT: vor.vv v10, v10, v12, v0.t -; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v12, v16, v12, v0.t -; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t -; RV32-NEXT: vand.vx v16, v16, a4, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v14, v0.t -; RV32-NEXT: vor.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v10, v8, v0.t +; RV32-NEXT: vand.vv v14, v14, v16, v0.t +; RV32-NEXT: vor.vv v12, v14, v12, v0.t +; RV32-NEXT: vor.vv v10, v12, v10, v0.t +; RV32-NEXT: vsll.vx v12, v8, a1, v0.t +; RV32-NEXT: vand.vx v14, v8, a3, v0.t +; RV32-NEXT: vsll.vx v14, v14, a2, v0.t +; RV32-NEXT: vor.vv v12, v12, v14, v0.t +; RV32-NEXT: vand.vx v14, v8, a4, v0.t +; RV32-NEXT: vsll.vi v14, v14, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v14, v8, v0.t +; RV32-NEXT: vor.vv v8, v12, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v10, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -802,35 +802,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsll.vx v10, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v12, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v12, v12, a3 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a5), zero +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v12, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v12, v12, a3 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v14, (a4), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v16, v8, v14 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: vsrl.vx v16, v8, a3 -; RV32-NEXT: vand.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vand.vv v12, v12, v14 ; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v16, v16, a0 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: vand.vx v16, v8, a3 +; RV32-NEXT: vsll.vx v16, v16, a2 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vsll.vi v16, v16, 24 ; RV32-NEXT: vand.vv v8, v8, v14 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -884,35 +884,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsll.vx v12, v8, a1, v0.t -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v16, v8, a2, v0.t -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v16, v16, a3, v0.t -; RV32-NEXT: vor.vv v16, v12, v16, v0.t +; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v16, v16, a3, v0.t +; RV32-NEXT: vor.vv v12, v16, v12, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v8, a4, v0.t -; RV32-NEXT: vsll.vi v20, v12, 24, v0.t +; RV32-NEXT: vand.vx v20, v16, a4, v0.t +; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v8, v12, v0.t -; RV32-NEXT: vsll.vi v24, v24, 8, v0.t -; RV32-NEXT: vor.vv v20, v20, v24, v0.t -; RV32-NEXT: vor.vv v16, v16, v20, v0.t -; RV32-NEXT: vsrl.vx v20, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t -; RV32-NEXT: vand.vx v24, v24, a2, v0.t +; RV32-NEXT: vand.vv v24, v24, v16, v0.t ; RV32-NEXT: vor.vv v20, v24, v20, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a4, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t -; RV32-NEXT: vand.vv v8, v8, v12, v0.t -; RV32-NEXT: vor.vv v8, v8, v24, v0.t -; RV32-NEXT: vor.vv v8, v8, v20, v0.t -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v12, v20, v12, v0.t +; RV32-NEXT: vsll.vx v20, v8, a1, v0.t +; RV32-NEXT: vand.vx v24, v8, a3, v0.t +; RV32-NEXT: vsll.vx v24, v24, a2, v0.t +; RV32-NEXT: vor.vv v20, v20, v24, v0.t +; RV32-NEXT: vand.vx v24, v8, a4, v0.t +; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t +; RV32-NEXT: vor.vv v8, v20, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -962,35 +962,35 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v16, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v16, v16, a3 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v20, (a5), zero +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v16, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v16, v16, a3 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v20, (a4), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v24, v8, v20 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: vsrl.vx v24, v8, a3 -; RV32-NEXT: vand.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vand.vv v16, v16, v20 ; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vand.vx v24, v24, a4 -; RV32-NEXT: vsrl.vi v8, v8, 8 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vand.vx v24, v24, a0 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vand.vx v24, v8, a3 +; RV32-NEXT: vsll.vx v24, v24, a2 +; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vand.vx v24, v8, a0 +; RV32-NEXT: vsll.vi v24, v24, 24 ; RV32-NEXT: vand.vv v8, v8, v20 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -1049,73 +1049,73 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a1, v0.t -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v8, a2, v0.t -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v24, v24, a3, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vand.vx v16, v16, a4, v0.t +; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v24, (a5), zero ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsll.vx v24, v8, a1, v0.t +; RV32-NEXT: vand.vx v16, v8, a3, v0.t +; RV32-NEXT: vsll.vx v16, v16, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a4, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vand.vx v16, v8, a4, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 @@ -1190,41 +1190,42 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vand.vx v24, v24, a0 ; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v0, v8, a3 +; RV32-NEXT: vsll.vx v0, v0, a2 +; RV32-NEXT: vsll.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vand.vx v8, v8, a0 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v0, v8, a3 -; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vsrl.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 @@ -1286,73 +1287,73 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a1, v0.t -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v8, a2, v0.t -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v24, v24, a3, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t -; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2, v0.t +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t -; RV32-NEXT: csrr a5, vlenb -; RV32-NEXT: slli a5, a5, 4 -; RV32-NEXT: add a5, sp, a5 -; RV32-NEXT: addi a5, a5, 16 +; RV32-NEXT: vand.vx v16, v16, a4, v0.t +; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v24, (a5), zero ; RV32-NEXT: csrr a5, vlenb ; RV32-NEXT: slli a5, a5, 3 ; RV32-NEXT: add a5, sp, a5 ; RV32-NEXT: addi a5, a5, 16 -; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v16, v16, v24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v24, v8, a1, v0.t -; RV32-NEXT: vsrl.vx v16, v8, a3, v0.t -; RV32-NEXT: vand.vx v16, v16, a2, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vsll.vx v24, v8, a1, v0.t +; RV32-NEXT: vand.vx v16, v8, a3, v0.t +; RV32-NEXT: vsll.vx v16, v16, a2, v0.t +; RV32-NEXT: vor.vv v16, v24, v16, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a4, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vand.vx v16, v8, a4, v0.t +; RV32-NEXT: vsll.vi v16, v16, 24, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vor.vv v8, v8, v24, v0.t +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vsll.vi v8, v8, 8, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 @@ -1427,41 +1428,42 @@ ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v0, v16, 24 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vsrl.vi v0, v8, 8 +; RV32-NEXT: addi a4, sp, 8 +; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a4), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vand.vx v24, v24, a0 ; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v0, v8, a3 +; RV32-NEXT: vsll.vx v0, v0, a2 +; RV32-NEXT: vsll.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vand.vx v8, v8, a0 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v0, v8, a3 -; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vsrl.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -2472,13 +2472,12 @@ ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb -; RV32-NEXT: vmv1r.v v1, v0 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 +; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 @@ -2486,7 +2485,7 @@ ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a2, a1, 3 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a2 +; RV32-NEXT: vslidedown.vx v24, v0, a2 ; RV32-NEXT: lui a2, 349525 ; RV32-NEXT: addi a2, a2, 1365 ; RV32-NEXT: sw a2, 44(sp) @@ -2503,166 +2502,186 @@ ; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: sw a2, 20(sp) ; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a0, a1, .LBB46_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: .LBB46_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a5, 40 +; RV32-NEXT: mul a4, a4, a5 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v16, v24, v16, v0.t +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v24, v16, v0.t -; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v16, v24, v16, v0.t +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v8, v16, a2, v0.t +; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB46_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB46_2: +; RV32-NEXT: sub a1, a0, a1 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vmv1r.v v0, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vsub.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t +; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 @@ -2762,9 +2781,15 @@ ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -2778,67 +2803,73 @@ ; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 -; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: addi a2, a1, 257 +; RV32-NEXT: sw a2, 20(sp) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: sub a2, a0, a1 -; RV32-NEXT: sltu a3, a0, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: sw a2, 16(sp) +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a0, a1, .LBB47_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: .LBB47_2: ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1 +; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a3, sp, 40 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vsub.vv v16, v16, v24 +; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: addi a3, sp, 32 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v0, (a3), zero ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v16, v0 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: vsrl.vi v24, v16, 4 -; RV32-NEXT: vadd.vv v24, v16, v24 +; RV32-NEXT: vand.vv v16, v8, v0 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v16, v24, v16 +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v8, v16 ; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a3), zero +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: addi a3, sp, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vmul.vv v16, v16, v24 +; RV32-NEXT: vmul.vv v16, v16, v8 ; RV32-NEXT: li a2, 56 -; RV32-NEXT: vsrl.vx v16, v16, a2 -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: bltu a0, a1, .LBB47_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: .LBB47_2: +; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: sub a1, a0, a1 +; RV32-NEXT: sltu a0, a0, a1 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 @@ -2847,11 +2878,11 @@ ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v0 -; RV32-NEXT: vadd.vv v8, v16, v8 +; RV32-NEXT: vsub.vv v16, v8, v16 +; RV32-NEXT: vand.vv v8, v16, v0 +; RV32-NEXT: vsrl.vi v16, v16, 2 +; RV32-NEXT: vand.vv v16, v16, v0 +; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb @@ -2860,17 +2891,18 @@ ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vsrl.vx v16, v8, a2 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vsrl.vx v8, v8, a2 -; RV32-NEXT: addi a0, sp, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-reverse.ll @@ -72,10 +72,10 @@ define <32 x i1> @reverse_v32i1(<32 x i1> %a) { ; CHECK-LABEL: reverse_v32i1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli zero, a0, e8, m2, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0) +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 @@ -89,10 +89,10 @@ define <64 x i1> @reverse_v64i1(<64 x i1> %a) { ; CHECK-LABEL: reverse_v64i1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 64 -; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-trunc-vp.ll @@ -247,39 +247,41 @@ ; CHECK-NEXT: vslidedown.vi v3, v0, 8 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v2, v0, 4 -; CHECK-NEXT: vslidedown.vi v27, v3, 4 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v27, 2 ; CHECK-NEXT: addi a2, a1, 512 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: addi a3, a1, 640 -; CHECK-NEXT: vle64.v v8, (a3) -; CHECK-NEXT: addi a3, a7, -64 -; CHECK-NEXT: sltu a4, a7, a3 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a4, a4, a3 -; CHECK-NEXT: addi a3, a4, -32 -; CHECK-NEXT: sltu a5, a4, a3 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a3, a5, a3 -; CHECK-NEXT: addi a5, a3, -16 -; CHECK-NEXT: sltu a6, a3, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: vle64.v v16, (a2) +; CHECK-NEXT: vle64.v v8, (a2) ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a6, 40 -; CHECK-NEXT: mul a2, a2, a6 +; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v27, v3, 4 +; CHECK-NEXT: addi a2, a1, 640 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a2) +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: addi a2, a7, -64 +; CHECK-NEXT: sltu a3, a7, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a4, a3, a2 +; CHECK-NEXT: addi a2, a4, -32 +; CHECK-NEXT: sltu a3, a4, a2 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a2 +; CHECK-NEXT: addi a2, a3, -16 +; CHECK-NEXT: sltu a5, a3, a2 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a2, a5, a2 +; CHECK-NEXT: vslidedown.vi v0, v27, 2 +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: addi a5, a1, 128 ; CHECK-NEXT: bltu a3, a2, .LBB16_2 @@ -289,7 +291,7 @@ ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v4, v2, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a5) +; CHECK-NEXT: vle64.v v16, (a5) ; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma ; CHECK-NEXT: li a3, 64 ; CHECK-NEXT: vmv1r.v v0, v27 @@ -298,21 +300,21 @@ ; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload -; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t +; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload +; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t ; CHECK-NEXT: csrr a5, vlenb ; CHECK-NEXT: li a6, 48 ; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a7, a3, .LBB16_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a7, 64 ; CHECK-NEXT: .LBB16_4: ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vle64.v v8, (a1) ; CHECK-NEXT: addi a5, a7, -32 ; CHECK-NEXT: sltu a6, a7, a5 ; CHECK-NEXT: addi a6, a6, -1 @@ -323,9 +325,9 @@ ; CHECK-NEXT: and a6, t0, a6 ; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v4 -; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t +; CHECK-NEXT: vnsrl.wi v24, v16, 0, v0.t ; CHECK-NEXT: addi a6, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a5, a2, .LBB16_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: li a5, 16 @@ -334,13 +336,13 @@ ; CHECK-NEXT: addi a1, a1, 256 ; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma ; CHECK-NEXT: vmv1r.v v0, v2 -; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t +; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t ; CHECK-NEXT: csrr a5, vlenb ; CHECK-NEXT: li t0, 40 ; CHECK-NEXT: mul a5, a5, t0 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a4, a3, .LBB16_8 ; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: li a4, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse.ll @@ -85,10 +85,10 @@ ; RV32-NEXT: vand.vx v9, v9, a1 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a1 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vsll.vi v10, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -124,10 +124,10 @@ ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a1 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vsll.vi v10, v8, 24 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vsll.vi v8, v8, 8 +; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vsrl.vi v9, v8, 4 ; RV64-NEXT: lui a1, 61681 @@ -484,10 +484,10 @@ ; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV32-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vand.vx v12, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 +; LMULMAX2-RV32-NEXT: vsll.vi v12, v8, 24 +; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 +; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV32-NEXT: lui a1, 61681 @@ -523,10 +523,10 @@ ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 +; LMULMAX2-RV64-NEXT: vsll.vi v12, v8, 24 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 8 +; LMULMAX2-RV64-NEXT: vor.vv v8, v12, v8 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX2-RV64-NEXT: lui a1, 61681 @@ -564,10 +564,10 @@ ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vand.vx v11, v8, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v8, 24 +; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 +; LMULMAX1-RV32-NEXT: vor.vv v8, v11, v8 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV32-NEXT: lui a3, 61681 @@ -594,10 +594,10 @@ ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v9, 24 +; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 8 +; LMULMAX1-RV32-NEXT: vor.vv v9, v11, v9 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a3 @@ -630,10 +630,10 @@ ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 24 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 8 +; LMULMAX1-RV64-NEXT: vor.vv v8, v11, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v8, 4 ; LMULMAX1-RV64-NEXT: lui a3, 61681 @@ -660,10 +660,10 @@ ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 24 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 24 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 8 +; LMULMAX1-RV64-NEXT: vor.vv v9, v11, v9 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 4 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a3 @@ -704,30 +704,30 @@ ; LMULMAX2-RV32-NEXT: addi a3, a3, -256 ; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3 ; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: lui a4, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a4 -; LMULMAX2-RV32-NEXT: li a5, 85 +; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 8 +; LMULMAX2-RV32-NEXT: li a4, 85 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v0, a5 +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a4 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 -; LMULMAX2-RV32-NEXT: lui a5, 1044480 -; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a5, v0 +; LMULMAX2-RV32-NEXT: lui a4, 1044480 +; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a4, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v16, v16, v14 -; LMULMAX2-RV32-NEXT: vor.vv v12, v16, v12 +; LMULMAX2-RV32-NEXT: vand.vv v12, v12, v14 +; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 24 +; LMULMAX2-RV32-NEXT: lui a4, 4080 +; LMULMAX2-RV32-NEXT: vand.vx v16, v16, a4 +; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 ; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 ; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1 ; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3 ; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2 ; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a4 -; LMULMAX2-RV32-NEXT: vsll.vi v16, v16, 24 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v16, v8 +; LMULMAX2-RV32-NEXT: vand.vv v14, v8, v14 +; LMULMAX2-RV32-NEXT: vsll.vi v14, v14, 8 +; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a4 +; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 +; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v14 ; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap.ll @@ -33,10 +33,10 @@ ; RV32-NEXT: vand.vx v9, v9, a1 ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vand.vx v10, v8, a1 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vsll.vi v10, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a1 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: ret @@ -51,10 +51,10 @@ ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsrl.vi v10, v8, 24 ; RV64-NEXT: vor.vv v9, v9, v10 -; RV64-NEXT: vand.vx v10, v8, a1 -; RV64-NEXT: vsll.vi v10, v10, 8 -; RV64-NEXT: vsll.vi v8, v8, 24 -; RV64-NEXT: vor.vv v8, v8, v10 +; RV64-NEXT: vsll.vi v10, v8, 24 +; RV64-NEXT: vand.vx v8, v8, a1 +; RV64-NEXT: vsll.vi v8, v8, 8 +; RV64-NEXT: vor.vv v8, v10, v8 ; RV64-NEXT: vor.vv v8, v8, v9 ; RV64-NEXT: vse32.v v8, (a0) ; RV64-NEXT: ret @@ -219,10 +219,10 @@ ; LMULMAX2-RV32-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV32-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV32-NEXT: vand.vx v12, v8, a1 -; LMULMAX2-RV32-NEXT: vsll.vi v12, v12, 8 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v12 +; LMULMAX2-RV32-NEXT: vsll.vi v12, v8, 24 +; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 +; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret @@ -237,10 +237,10 @@ ; LMULMAX2-RV64-NEXT: vand.vx v10, v10, a1 ; LMULMAX2-RV64-NEXT: vsrl.vi v12, v8, 24 ; LMULMAX2-RV64-NEXT: vor.vv v10, v10, v12 -; LMULMAX2-RV64-NEXT: vand.vx v12, v8, a1 -; LMULMAX2-RV64-NEXT: vsll.vi v12, v12, 8 -; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 24 -; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v12 +; LMULMAX2-RV64-NEXT: vsll.vi v12, v8, 24 +; LMULMAX2-RV64-NEXT: vand.vx v8, v8, a1 +; LMULMAX2-RV64-NEXT: vsll.vi v8, v8, 8 +; LMULMAX2-RV64-NEXT: vor.vv v8, v12, v8 ; LMULMAX2-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vse32.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret @@ -257,19 +257,19 @@ ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vand.vx v11, v8, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 24 -; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v11 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v8, 24 +; LMULMAX1-RV32-NEXT: vand.vx v8, v8, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v8, v8, 8 +; LMULMAX1-RV32-NEXT: vor.vv v8, v11, v8 ; LMULMAX1-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 8 ; LMULMAX1-RV32-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV32-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV32-NEXT: vand.vx v11, v9, a2 -; LMULMAX1-RV32-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 24 -; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v11 +; LMULMAX1-RV32-NEXT: vsll.vi v11, v9, 24 +; LMULMAX1-RV32-NEXT: vand.vx v9, v9, a2 +; LMULMAX1-RV32-NEXT: vsll.vi v9, v9, 8 +; LMULMAX1-RV32-NEXT: vor.vv v9, v11, v9 ; LMULMAX1-RV32-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) ; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) @@ -287,19 +287,19 @@ ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v8, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vand.vx v11, v8, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 24 -; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v11 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v8, 24 +; LMULMAX1-RV64-NEXT: vand.vx v8, v8, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v8, v8, 8 +; LMULMAX1-RV64-NEXT: vor.vv v8, v11, v8 ; LMULMAX1-RV64-NEXT: vor.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vsrl.vi v10, v9, 8 ; LMULMAX1-RV64-NEXT: vand.vx v10, v10, a2 ; LMULMAX1-RV64-NEXT: vsrl.vi v11, v9, 24 ; LMULMAX1-RV64-NEXT: vor.vv v10, v10, v11 -; LMULMAX1-RV64-NEXT: vand.vx v11, v9, a2 -; LMULMAX1-RV64-NEXT: vsll.vi v11, v11, 8 -; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 24 -; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v11 +; LMULMAX1-RV64-NEXT: vsll.vi v11, v9, 24 +; LMULMAX1-RV64-NEXT: vand.vx v9, v9, a2 +; LMULMAX1-RV64-NEXT: vsll.vi v9, v9, 8 +; LMULMAX1-RV64-NEXT: vor.vv v9, v11, v9 ; LMULMAX1-RV64-NEXT: vor.vv v9, v9, v10 ; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) ; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) @@ -325,30 +325,30 @@ ; LMULMAX2-RV32-NEXT: addi a3, a3, -256 ; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a3 ; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 24 -; LMULMAX2-RV32-NEXT: lui a4, 4080 -; LMULMAX2-RV32-NEXT: vand.vx v12, v12, a4 -; LMULMAX2-RV32-NEXT: li a5, 85 +; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 8 +; LMULMAX2-RV32-NEXT: li a4, 85 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v0, a5 +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a4 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 -; LMULMAX2-RV32-NEXT: lui a5, 1044480 -; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a5, v0 +; LMULMAX2-RV32-NEXT: lui a4, 1044480 +; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a4, v0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 8 -; LMULMAX2-RV32-NEXT: vand.vv v16, v16, v14 -; LMULMAX2-RV32-NEXT: vor.vv v12, v16, v12 +; LMULMAX2-RV32-NEXT: vand.vv v12, v12, v14 +; LMULMAX2-RV32-NEXT: vsrl.vi v16, v8, 24 +; LMULMAX2-RV32-NEXT: lui a4, 4080 +; LMULMAX2-RV32-NEXT: vand.vx v16, v16, a4 +; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 ; LMULMAX2-RV32-NEXT: vor.vv v10, v12, v10 ; LMULMAX2-RV32-NEXT: vsll.vx v12, v8, a1 ; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a3 ; LMULMAX2-RV32-NEXT: vsll.vx v16, v16, a2 ; LMULMAX2-RV32-NEXT: vor.vv v12, v12, v16 -; LMULMAX2-RV32-NEXT: vand.vx v16, v8, a4 -; LMULMAX2-RV32-NEXT: vsll.vi v16, v16, 24 -; LMULMAX2-RV32-NEXT: vand.vv v8, v8, v14 -; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 8 -; LMULMAX2-RV32-NEXT: vor.vv v8, v16, v8 +; LMULMAX2-RV32-NEXT: vand.vv v14, v8, v14 +; LMULMAX2-RV32-NEXT: vsll.vi v14, v14, 8 +; LMULMAX2-RV32-NEXT: vand.vx v8, v8, a4 +; LMULMAX2-RV32-NEXT: vsll.vi v8, v8, 24 +; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v14 ; LMULMAX2-RV32-NEXT: vor.vv v8, v12, v8 ; LMULMAX2-RV32-NEXT: vor.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse64.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll @@ -264,13 +264,13 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v9, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX2-RV32-NEXT: vand.vv v9, v10, v9 +; LMULMAX2-RV32-NEXT: vand.vv v9, v9, v10 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v9 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819 @@ -340,13 +340,13 @@ ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) +; LMULMAX1-RV32-NEXT: vsrl.vi v9, v8, 1 ; LMULMAX1-RV32-NEXT: lui a1, 349525 ; LMULMAX1-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v9, a1 +; LMULMAX1-RV32-NEXT: vmv.v.x v10, a1 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 -; LMULMAX1-RV32-NEXT: vand.vv v9, v10, v9 +; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 ; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v9 ; LMULMAX1-RV32-NEXT: lui a1, 209715 ; LMULMAX1-RV32-NEXT: addi a1, a1, 819 @@ -772,13 +772,13 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle64.v v8, (a0) +; LMULMAX2-RV32-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX2-RV32-NEXT: lui a1, 349525 ; LMULMAX2-RV32-NEXT: addi a1, a1, 1365 ; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v10, a1 +; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32-NEXT: vsrl.vi v12, v8, 1 -; LMULMAX2-RV32-NEXT: vand.vv v10, v12, v10 +; LMULMAX2-RV32-NEXT: vand.vv v10, v10, v12 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: lui a1, 209715 ; LMULMAX2-RV32-NEXT: addi a1, a1, 819 @@ -847,56 +847,56 @@ ; LMULMAX1-RV32-LABEL: ctpop_v4i64: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) +; LMULMAX1-RV32-NEXT: vle64.v v8, (a1) +; LMULMAX1-RV32-NEXT: vle64.v v9, (a0) +; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 1 ; LMULMAX1-RV32-NEXT: lui a2, 349525 ; LMULMAX1-RV32-NEXT: addi a2, a2, 1365 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v11, a2 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vsrl.vi v11, v9, 1 -; LMULMAX1-RV32-NEXT: vand.vv v11, v11, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v11 +; LMULMAX1-RV32-NEXT: vand.vv v10, v10, v11 +; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 ; LMULMAX1-RV32-NEXT: lui a2, 209715 ; LMULMAX1-RV32-NEXT: addi a2, a2, 819 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmv.v.x v11, a2 +; LMULMAX1-RV32-NEXT: vmv.v.x v10, a2 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v12, v9, v11 -; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v12, v9 -; LMULMAX1-RV32-NEXT: vsrl.vi v12, v9, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: vand.vv v12, v8, v10 +; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v10 +; LMULMAX1-RV32-NEXT: vadd.vv v8, v12, v8 +; LMULMAX1-RV32-NEXT: vsrl.vi v12, v8, 4 +; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v12 ; LMULMAX1-RV32-NEXT: lui a2, 61681 ; LMULMAX1-RV32-NEXT: addi a2, a2, -241 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.x v12, a2 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v12 ; LMULMAX1-RV32-NEXT: lui a2, 4112 ; LMULMAX1-RV32-NEXT: addi a2, a2, 257 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX1-RV32-NEXT: vmv.v.x v13, a2 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v13 -; LMULMAX1-RV32-NEXT: li a2, 56 -; LMULMAX1-RV32-NEXT: vsrl.vx v9, v9, a2 -; LMULMAX1-RV32-NEXT: vsrl.vi v14, v8, 1 -; LMULMAX1-RV32-NEXT: vand.vv v10, v14, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vand.vv v10, v8, v11 -; LMULMAX1-RV32-NEXT: vsrl.vi v8, v8, 2 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX1-RV32-NEXT: vsrl.vi v10, v8, 4 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vand.vv v8, v8, v12 ; LMULMAX1-RV32-NEXT: vmul.vv v8, v8, v13 +; LMULMAX1-RV32-NEXT: li a2, 56 ; LMULMAX1-RV32-NEXT: vsrl.vx v8, v8, a2 -; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) +; LMULMAX1-RV32-NEXT: vsrl.vi v14, v9, 1 +; LMULMAX1-RV32-NEXT: vand.vv v11, v14, v11 +; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v11 +; LMULMAX1-RV32-NEXT: vand.vv v11, v9, v10 +; LMULMAX1-RV32-NEXT: vsrl.vi v9, v9, 2 +; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v10 +; LMULMAX1-RV32-NEXT: vadd.vv v9, v11, v9 +; LMULMAX1-RV32-NEXT: vsrl.vi v10, v9, 4 +; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v10 +; LMULMAX1-RV32-NEXT: vand.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: vmul.vv v9, v9, v13 +; LMULMAX1-RV32-NEXT: vsrl.vx v9, v9, a2 +; LMULMAX1-RV32-NEXT: vse64.v v9, (a0) +; LMULMAX1-RV32-NEXT: vse64.v v8, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: ctpop_v4i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -522,20 +522,20 @@ ; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.i v9, 0 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v9 -; LMULMAX2-RV32F-NEXT: vsub.vv v9, v9, v8 -; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32F-NEXT: vsub.vv v10, v9, v8 +; LMULMAX2-RV32F-NEXT: vand.vv v10, v8, v10 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v9, v8 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v11, v10 ; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v9, 23 +; LMULMAX2-RV32F-NEXT: vsrl.vi v10, v11, 23 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vzext.vf2 v9, v8 +; LMULMAX2-RV32F-NEXT: vzext.vf2 v11, v10 ; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v8, v9, a1 +; LMULMAX2-RV32F-NEXT: vsub.vx v10, v11, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v9 ; LMULMAX2-RV32F-NEXT: li a1, 64 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v10, a1, v0 ; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32F-NEXT: ret ; @@ -566,18 +566,18 @@ ; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX2-RV32D-NEXT: vmv.v.i v9, 0 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v9 -; LMULMAX2-RV32D-NEXT: vsub.vv v9, v9, v8 -; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v9 +; LMULMAX2-RV32D-NEXT: vsub.vv v10, v9, v8 +; LMULMAX2-RV32D-NEXT: vand.vv v10, v8, v10 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v10, v10 ; LMULMAX2-RV32D-NEXT: fsrm a1 ; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX2-RV32D-NEXT: vsrl.vx v10, v10, a1 ; LMULMAX2-RV32D-NEXT: li a1, 1023 -; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1 +; LMULMAX2-RV32D-NEXT: vsub.vx v10, v10, a1 +; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v9 ; LMULMAX2-RV32D-NEXT: li a1, 64 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v10, a1, v0 ; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32D-NEXT: ret ; @@ -607,18 +607,18 @@ ; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; LMULMAX8-RV32-NEXT: vmv.v.i v9, 0 ; LMULMAX8-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v9 -; LMULMAX8-RV32-NEXT: vsub.vv v9, v9, v8 -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v9 +; LMULMAX8-RV32-NEXT: vsub.vv v10, v9, v8 +; LMULMAX8-RV32-NEXT: vand.vv v10, v8, v10 ; LMULMAX8-RV32-NEXT: fsrmi a1, 1 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v8, v8 +; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v10, v10 ; LMULMAX8-RV32-NEXT: fsrm a1 ; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX8-RV32-NEXT: vsrl.vx v10, v10, a1 ; LMULMAX8-RV32-NEXT: li a1, 1023 -; LMULMAX8-RV32-NEXT: vsub.vx v8, v8, a1 +; LMULMAX8-RV32-NEXT: vsub.vx v10, v10, a1 +; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v9 ; LMULMAX8-RV32-NEXT: li a1, 64 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 +; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v10, a1, v0 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV32-NEXT: ret ; @@ -1173,20 +1173,20 @@ ; LMULMAX2-RV32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32F-NEXT: vmv.v.i v10, 0 ; LMULMAX2-RV32F-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v10 -; LMULMAX2-RV32F-NEXT: vsub.vv v10, v10, v8 -; LMULMAX2-RV32F-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32F-NEXT: vsub.vv v12, v10, v8 +; LMULMAX2-RV32F-NEXT: vand.vv v12, v8, v12 ; LMULMAX2-RV32F-NEXT: fsrmi a1, 1 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v10, v8 +; LMULMAX2-RV32F-NEXT: vfncvt.f.xu.w v14, v12 ; LMULMAX2-RV32F-NEXT: fsrm a1 -; LMULMAX2-RV32F-NEXT: vsrl.vi v8, v10, 23 +; LMULMAX2-RV32F-NEXT: vsrl.vi v12, v14, 23 ; LMULMAX2-RV32F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; LMULMAX2-RV32F-NEXT: vzext.vf2 v10, v8 +; LMULMAX2-RV32F-NEXT: vzext.vf2 v14, v12 ; LMULMAX2-RV32F-NEXT: li a1, 127 -; LMULMAX2-RV32F-NEXT: vsub.vx v8, v10, a1 +; LMULMAX2-RV32F-NEXT: vsub.vx v12, v14, a1 +; LMULMAX2-RV32F-NEXT: vmseq.vv v0, v8, v10 ; LMULMAX2-RV32F-NEXT: li a1, 64 -; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v8, a1, v0 +; LMULMAX2-RV32F-NEXT: vmerge.vxm v8, v12, a1, v0 ; LMULMAX2-RV32F-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32F-NEXT: ret ; @@ -1217,18 +1217,18 @@ ; LMULMAX2-RV32D-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-RV32D-NEXT: vmv.v.i v10, 0 ; LMULMAX2-RV32D-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v10 -; LMULMAX2-RV32D-NEXT: vsub.vv v10, v10, v8 -; LMULMAX2-RV32D-NEXT: vand.vv v8, v8, v10 +; LMULMAX2-RV32D-NEXT: vsub.vv v12, v10, v8 +; LMULMAX2-RV32D-NEXT: vand.vv v12, v8, v12 ; LMULMAX2-RV32D-NEXT: fsrmi a1, 1 -; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v8, v8 +; LMULMAX2-RV32D-NEXT: vfcvt.f.xu.v v12, v12 ; LMULMAX2-RV32D-NEXT: fsrm a1 ; LMULMAX2-RV32D-NEXT: li a1, 52 -; LMULMAX2-RV32D-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX2-RV32D-NEXT: vsrl.vx v12, v12, a1 ; LMULMAX2-RV32D-NEXT: li a1, 1023 -; LMULMAX2-RV32D-NEXT: vsub.vx v8, v8, a1 +; LMULMAX2-RV32D-NEXT: vsub.vx v12, v12, a1 +; LMULMAX2-RV32D-NEXT: vmseq.vv v0, v8, v10 ; LMULMAX2-RV32D-NEXT: li a1, 64 -; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v8, a1, v0 +; LMULMAX2-RV32D-NEXT: vmerge.vxm v8, v12, a1, v0 ; LMULMAX2-RV32D-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV32D-NEXT: ret ; @@ -1258,18 +1258,18 @@ ; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX8-RV32-NEXT: vmv.v.i v10, 0 ; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v10 -; LMULMAX8-RV32-NEXT: vsub.vv v10, v10, v8 -; LMULMAX8-RV32-NEXT: vand.vv v8, v8, v10 +; LMULMAX8-RV32-NEXT: vsub.vv v12, v10, v8 +; LMULMAX8-RV32-NEXT: vand.vv v12, v8, v12 ; LMULMAX8-RV32-NEXT: fsrmi a1, 1 -; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v8, v8 +; LMULMAX8-RV32-NEXT: vfcvt.f.xu.v v12, v12 ; LMULMAX8-RV32-NEXT: fsrm a1 ; LMULMAX8-RV32-NEXT: li a1, 52 -; LMULMAX8-RV32-NEXT: vsrl.vx v8, v8, a1 +; LMULMAX8-RV32-NEXT: vsrl.vx v12, v12, a1 ; LMULMAX8-RV32-NEXT: li a1, 1023 -; LMULMAX8-RV32-NEXT: vsub.vx v8, v8, a1 +; LMULMAX8-RV32-NEXT: vsub.vx v12, v12, a1 +; LMULMAX8-RV32-NEXT: vmseq.vv v0, v8, v10 ; LMULMAX8-RV32-NEXT: li a1, 64 -; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v8, a1, v0 +; LMULMAX8-RV32-NEXT: vmerge.vxm v8, v12, a1, v0 ; LMULMAX8-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX8-RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -48,11 +48,11 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: extractelt_v2i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -255,48 +255,58 @@ ; RV32-V128-NEXT: addi sp, sp, -16 ; RV32-V128-NEXT: .cfi_def_cfa_offset 16 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: li a1, 24 +; RV32-V128-NEXT: mul a0, a0, a1 ; RV32-V128-NEXT: sub sp, sp, a0 -; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV32-V128-NEXT: lui a0, %hi(.LCPI10_0) -; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_0) -; RV32-V128-NEXT: li a1, 32 -; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-V128-NEXT: vle32.v v0, (a0) -; RV32-V128-NEXT: vmv8r.v v24, v8 -; RV32-V128-NEXT: addi a0, sp, 16 -; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-V128-NEXT: vrgather.vv v8, v24, v0 -; RV32-V128-NEXT: lui a0, %hi(.LCPI10_1) -; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_1) -; RV32-V128-NEXT: vle32.v v24, (a0) +; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-V128-NEXT: csrr a0, vlenb ; RV32-V128-NEXT: slli a0, a0, 3 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-V128-NEXT: lui a0, 699051 -; RV32-V128-NEXT: addi a0, a0, -1366 +; RV32-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: addi a0, sp, 16 +; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: li a0, 32 +; RV32-V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-V128-NEXT: lui a1, %hi(.LCPI10_1) +; RV32-V128-NEXT: addi a1, a1, %lo(.LCPI10_1) +; RV32-V128-NEXT: vle32.v v0, (a1) +; RV32-V128-NEXT: lui a1, %hi(.LCPI10_0) +; RV32-V128-NEXT: addi a1, a1, %lo(.LCPI10_0) +; RV32-V128-NEXT: vle32.v v16, (a1) +; RV32-V128-NEXT: csrr a1, vlenb +; RV32-V128-NEXT: slli a1, a1, 4 +; RV32-V128-NEXT: add a1, sp, a1 +; RV32-V128-NEXT: addi a1, a1, 16 +; RV32-V128-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-V128-NEXT: vrgather.vv v16, v8, v0 +; RV32-V128-NEXT: lui a1, 699051 +; RV32-V128-NEXT: addi a1, a1, -1366 ; RV32-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-V128-NEXT: vmv.v.x v0, a0 -; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-V128-NEXT: vmv.v.x v0, a1 +; RV32-V128-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 3 +; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 ; RV32-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vrgather.vv v8, v16, v24, v0.t -; RV32-V128-NEXT: vmv.v.v v24, v8 +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: slli a0, a0, 3 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV32-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-V128-NEXT: vmv4r.v v24, v8 ; RV32-V128-NEXT: addi a0, sp, 16 ; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vwaddu.vv v0, v8, v16 +; RV32-V128-NEXT: vwaddu.vv v0, v8, v24 ; RV32-V128-NEXT: li a0, -1 -; RV32-V128-NEXT: vwmaccu.vx v0, a0, v16 +; RV32-V128-NEXT: vwmaccu.vx v0, a0, v24 ; RV32-V128-NEXT: vmv8r.v v8, v0 -; RV32-V128-NEXT: vmv8r.v v16, v24 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: li a1, 24 +; RV32-V128-NEXT: mul a0, a0, a1 ; RV32-V128-NEXT: add sp, sp, a0 ; RV32-V128-NEXT: addi sp, sp, 16 ; RV32-V128-NEXT: ret @@ -306,48 +316,58 @@ ; RV64-V128-NEXT: addi sp, sp, -16 ; RV64-V128-NEXT: .cfi_def_cfa_offset 16 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: li a1, 24 +; RV64-V128-NEXT: mul a0, a0, a1 ; RV64-V128-NEXT: sub sp, sp, a0 -; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-V128-NEXT: lui a0, %hi(.LCPI10_0) -; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_0) -; RV64-V128-NEXT: li a1, 32 -; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV64-V128-NEXT: vle32.v v0, (a0) -; RV64-V128-NEXT: vmv8r.v v24, v8 -; RV64-V128-NEXT: addi a0, sp, 16 -; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV64-V128-NEXT: vrgather.vv v8, v24, v0 -; RV64-V128-NEXT: lui a0, %hi(.LCPI10_1) -; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_1) -; RV64-V128-NEXT: vle32.v v24, (a0) +; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV64-V128-NEXT: csrr a0, vlenb ; RV64-V128-NEXT: slli a0, a0, 3 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV64-V128-NEXT: lui a0, 699051 -; RV64-V128-NEXT: addiw a0, a0, -1366 +; RV64-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: addi a0, sp, 16 +; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: li a0, 32 +; RV64-V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV64-V128-NEXT: lui a1, %hi(.LCPI10_1) +; RV64-V128-NEXT: addi a1, a1, %lo(.LCPI10_1) +; RV64-V128-NEXT: vle32.v v0, (a1) +; RV64-V128-NEXT: lui a1, %hi(.LCPI10_0) +; RV64-V128-NEXT: addi a1, a1, %lo(.LCPI10_0) +; RV64-V128-NEXT: vle32.v v16, (a1) +; RV64-V128-NEXT: csrr a1, vlenb +; RV64-V128-NEXT: slli a1, a1, 4 +; RV64-V128-NEXT: add a1, sp, a1 +; RV64-V128-NEXT: addi a1, a1, 16 +; RV64-V128-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-V128-NEXT: vrgather.vv v16, v8, v0 +; RV64-V128-NEXT: lui a1, 699051 +; RV64-V128-NEXT: addiw a1, a1, -1366 ; RV64-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-V128-NEXT: vmv.v.x v0, a0 -; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV64-V128-NEXT: vmv.v.x v0, a1 +; RV64-V128-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 3 +; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 ; RV64-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vrgather.vv v8, v16, v24, v0.t -; RV64-V128-NEXT: vmv.v.v v24, v8 +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: slli a0, a0, 3 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV64-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-V128-NEXT: vmv4r.v v24, v8 ; RV64-V128-NEXT: addi a0, sp, 16 ; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vwaddu.vv v0, v8, v16 +; RV64-V128-NEXT: vwaddu.vv v0, v8, v24 ; RV64-V128-NEXT: li a0, -1 -; RV64-V128-NEXT: vwmaccu.vx v0, a0, v16 +; RV64-V128-NEXT: vwmaccu.vx v0, a0, v24 ; RV64-V128-NEXT: vmv8r.v v8, v0 -; RV64-V128-NEXT: vmv8r.v v16, v24 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: li a1, 24 +; RV64-V128-NEXT: mul a0, a0, a1 ; RV64-V128-NEXT: add sp, sp, a0 ; RV64-V128-NEXT: addi sp, sp, 16 ; RV64-V128-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -138,11 +138,11 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { ; RV32-LABEL: vrgather_shuffle_xv_v4f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; RV32-NEXT: vid.v v12 ; RV32-NEXT: lui a0, %hi(.LCPI7_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0) +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero +; RV32-NEXT: vid.v v12 ; RV32-NEXT: vrsub.vi v12, v12, 4 ; RV32-NEXT: vmv.v.i v0, 12 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -152,12 +152,12 @@ ; ; RV64-LABEL: vrgather_shuffle_xv_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vid.v v10 -; RV64-NEXT: vrsub.vi v12, v10, 4 ; RV64-NEXT: lui a0, %hi(.LCPI7_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI7_0) +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vlse64.v v10, (a0), zero +; RV64-NEXT: vid.v v12 +; RV64-NEXT: vrsub.vi v12, v12, 4 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.i v0, 12 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll @@ -799,40 +799,30 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v24, (a0) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-NEXT: li a0, 63 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vx v8, v24, a0, v0.t -; RV32-NEXT: vsrl.vv v8, v16, v8, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsll.vi v16, v8, 1, v0.t -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v8, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vxor.vv v8, v24, v8, v0.t +; RV32-NEXT: li a0, 63 ; RV32-NEXT: vand.vx v8, v8, a0, v0.t ; RV32-NEXT: vsll.vv v8, v16, v8, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vx v16, v24, a0, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vv v16, v24, v16, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -845,20 +835,20 @@ ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: sub sp, sp, a2 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV64-NEXT: li a0, 63 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vand.vx v8, v24, a0, v0.t -; RV64-NEXT: vsrl.vv v16, v16, v8, v0.t +; RV64-NEXT: vsll.vi v16, v8, 1, v0.t +; RV64-NEXT: li a0, 63 ; RV64-NEXT: vnot.v v8, v24, v0.t ; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsll.vv v8, v16, v8, v0.t +; RV64-NEXT: vand.vx v16, v24, a0, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsll.vi v24, v24, 1, v0.t -; RV64-NEXT: vsll.vv v8, v24, v8, v0.t +; RV64-NEXT: vsrl.vv v16, v24, v16, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 @@ -876,41 +866,30 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v24, (a0) -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vmv8r.v v16, v8 -; RV32-NEXT: li a0, 63 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vx v8, v24, a0, v0.t -; RV32-NEXT: vsll.vv v8, v16, v8, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v16, 1, v0.t +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v8, -1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vxor.vv v8, v24, v8, v0.t +; RV32-NEXT: li a0, 63 ; RV32-NEXT: vand.vx v8, v8, a0, v0.t ; RV32-NEXT: vsrl.vv v8, v16, v8, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vx v16, v24, a0, v0.t +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsll.vv v16, v24, v16, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -923,22 +902,21 @@ ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: sub sp, sp, a2 ; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV64-NEXT: vmv8r.v v16, v8 -; RV64-NEXT: li a0, 63 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vand.vx v8, v24, a0, v0.t -; RV64-NEXT: vsll.vv v8, v16, v8, v0.t -; RV64-NEXT: vnot.v v16, v24, v0.t -; RV64-NEXT: vand.vx v16, v16, a0, v0.t +; RV64-NEXT: vsrl.vi v16, v16, 1, v0.t +; RV64-NEXT: li a0, 63 +; RV64-NEXT: vnot.v v8, v24, v0.t +; RV64-NEXT: vand.vx v8, v8, a0, v0.t +; RV64-NEXT: vsrl.vv v8, v16, v8, v0.t +; RV64-NEXT: vand.vx v16, v24, a0, v0.t ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-NEXT: vsrl.vi v24, v24, 1, v0.t -; RV64-NEXT: vsrl.vv v16, v24, v16, v0.t -; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: vsll.vv v16, v24, v16, v0.t +; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -458,15 +458,14 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vlm.v v8, (a0) -; CHECK-NEXT: vmv1r.v v9, v0 +; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vmerge.vim v9, v10, 1, v0 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, tu, ma ; CHECK-NEXT: vmv.v.v v9, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma ; CHECK-NEXT: vmsne.vi v0, v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -414,48 +414,58 @@ ; RV32-V128-NEXT: addi sp, sp, -16 ; RV32-V128-NEXT: .cfi_def_cfa_offset 16 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: li a1, 24 +; RV32-V128-NEXT: mul a0, a0, a1 ; RV32-V128-NEXT: sub sp, sp, a0 -; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV32-V128-NEXT: lui a0, %hi(.LCPI17_0) -; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_0) -; RV32-V128-NEXT: li a1, 32 -; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-V128-NEXT: vle32.v v0, (a0) -; RV32-V128-NEXT: vmv8r.v v24, v8 -; RV32-V128-NEXT: addi a0, sp, 16 -; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-V128-NEXT: vrgather.vv v8, v24, v0 -; RV32-V128-NEXT: lui a0, %hi(.LCPI17_1) -; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_1) -; RV32-V128-NEXT: vle32.v v24, (a0) +; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV32-V128-NEXT: csrr a0, vlenb ; RV32-V128-NEXT: slli a0, a0, 3 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-V128-NEXT: lui a0, 699051 -; RV32-V128-NEXT: addi a0, a0, -1366 +; RV32-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: addi a0, sp, 16 +; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: li a0, 32 +; RV32-V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-V128-NEXT: lui a1, %hi(.LCPI17_1) +; RV32-V128-NEXT: addi a1, a1, %lo(.LCPI17_1) +; RV32-V128-NEXT: vle32.v v0, (a1) +; RV32-V128-NEXT: lui a1, %hi(.LCPI17_0) +; RV32-V128-NEXT: addi a1, a1, %lo(.LCPI17_0) +; RV32-V128-NEXT: vle32.v v16, (a1) +; RV32-V128-NEXT: csrr a1, vlenb +; RV32-V128-NEXT: slli a1, a1, 4 +; RV32-V128-NEXT: add a1, sp, a1 +; RV32-V128-NEXT: addi a1, a1, 16 +; RV32-V128-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-V128-NEXT: vrgather.vv v16, v8, v0 +; RV32-V128-NEXT: lui a1, 699051 +; RV32-V128-NEXT: addi a1, a1, -1366 ; RV32-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-V128-NEXT: vmv.v.x v0, a0 -; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV32-V128-NEXT: vmv.v.x v0, a1 +; RV32-V128-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 3 +; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 ; RV32-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vrgather.vv v8, v16, v24, v0.t -; RV32-V128-NEXT: vmv.v.v v24, v8 +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: slli a0, a0, 3 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV32-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-V128-NEXT: vmv4r.v v24, v8 ; RV32-V128-NEXT: addi a0, sp, 16 ; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vwaddu.vv v0, v8, v16 +; RV32-V128-NEXT: vwaddu.vv v0, v8, v24 ; RV32-V128-NEXT: li a0, -1 -; RV32-V128-NEXT: vwmaccu.vx v0, a0, v16 +; RV32-V128-NEXT: vwmaccu.vx v0, a0, v24 ; RV32-V128-NEXT: vmv8r.v v8, v0 -; RV32-V128-NEXT: vmv8r.v v16, v24 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: li a1, 24 +; RV32-V128-NEXT: mul a0, a0, a1 ; RV32-V128-NEXT: add sp, sp, a0 ; RV32-V128-NEXT: addi sp, sp, 16 ; RV32-V128-NEXT: ret @@ -465,48 +475,58 @@ ; RV64-V128-NEXT: addi sp, sp, -16 ; RV64-V128-NEXT: .cfi_def_cfa_offset 16 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: li a1, 24 +; RV64-V128-NEXT: mul a0, a0, a1 ; RV64-V128-NEXT: sub sp, sp, a0 -; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; RV64-V128-NEXT: lui a0, %hi(.LCPI17_0) -; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_0) -; RV64-V128-NEXT: li a1, 32 -; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV64-V128-NEXT: vle32.v v0, (a0) -; RV64-V128-NEXT: vmv8r.v v24, v8 -; RV64-V128-NEXT: addi a0, sp, 16 -; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV64-V128-NEXT: vrgather.vv v8, v24, v0 -; RV64-V128-NEXT: lui a0, %hi(.LCPI17_1) -; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_1) -; RV64-V128-NEXT: vle32.v v24, (a0) +; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; RV64-V128-NEXT: csrr a0, vlenb ; RV64-V128-NEXT: slli a0, a0, 3 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV64-V128-NEXT: lui a0, 699051 -; RV64-V128-NEXT: addiw a0, a0, -1366 +; RV64-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: addi a0, sp, 16 +; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: li a0, 32 +; RV64-V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV64-V128-NEXT: lui a1, %hi(.LCPI17_1) +; RV64-V128-NEXT: addi a1, a1, %lo(.LCPI17_1) +; RV64-V128-NEXT: vle32.v v0, (a1) +; RV64-V128-NEXT: lui a1, %hi(.LCPI17_0) +; RV64-V128-NEXT: addi a1, a1, %lo(.LCPI17_0) +; RV64-V128-NEXT: vle32.v v16, (a1) +; RV64-V128-NEXT: csrr a1, vlenb +; RV64-V128-NEXT: slli a1, a1, 4 +; RV64-V128-NEXT: add a1, sp, a1 +; RV64-V128-NEXT: addi a1, a1, 16 +; RV64-V128-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-V128-NEXT: vrgather.vv v16, v8, v0 +; RV64-V128-NEXT: lui a1, 699051 +; RV64-V128-NEXT: addiw a1, a1, -1366 ; RV64-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-V128-NEXT: vmv.v.x v0, a0 -; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; RV64-V128-NEXT: vmv.v.x v0, a1 +; RV64-V128-NEXT: vsetvli zero, a0, e32, m8, ta, mu ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 3 +; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 ; RV64-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vrgather.vv v8, v16, v24, v0.t -; RV64-V128-NEXT: vmv.v.v v24, v8 +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: slli a0, a0, 3 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV64-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV64-V128-NEXT: vmv4r.v v24, v8 ; RV64-V128-NEXT: addi a0, sp, 16 ; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vwaddu.vv v0, v8, v16 +; RV64-V128-NEXT: vwaddu.vv v0, v8, v24 ; RV64-V128-NEXT: li a0, -1 -; RV64-V128-NEXT: vwmaccu.vx v0, a0, v16 +; RV64-V128-NEXT: vwmaccu.vx v0, a0, v24 ; RV64-V128-NEXT: vmv8r.v v8, v0 -; RV64-V128-NEXT: vmv8r.v v16, v24 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: li a1, 24 +; RV64-V128-NEXT: mul a0, a0, a1 ; RV64-V128-NEXT: add sp, sp, a0 ; RV64-V128-NEXT: addi sp, sp, 16 ; RV64-V128-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -220,31 +220,32 @@ ; RV32-LABEL: vrgather_shuffle_xv_v8i64: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: lui a0, %hi(.LCPI12_1) +; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) +; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: lui a0, %hi(.LCPI12_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0) -; RV32-NEXT: vle16.v v16, (a0) +; RV32-NEXT: vle16.v v17, (a0) ; RV32-NEXT: vmv.v.i v20, -1 ; RV32-NEXT: vrgatherei16.vv v12, v20, v16 ; RV32-NEXT: li a0, 113 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 -; RV32-NEXT: lui a0, %hi(.LCPI12_1) -; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vle16.v v16, (a0) -; RV32-NEXT: vrgatherei16.vv v12, v8, v16, v0.t +; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_xv_v8i64: ; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI12_0) +; RV64-NEXT: addi a0, a0, %lo(.LCPI12_0) +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: li a0, 113 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: lui a0, %hi(.LCPI12_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI12_0) -; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: vrgather.vv v12, v8, v16, v0.t ; RV64-NEXT: vmv.v.v v8, v12 @@ -256,32 +257,33 @@ define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) { ; RV32-LABEL: vrgather_shuffle_vx_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI13_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: lui a0, %hi(.LCPI13_1) +; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1) ; RV32-NEXT: vle16.v v16, (a0) +; RV32-NEXT: lui a0, %hi(.LCPI13_0) +; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0) +; RV32-NEXT: vle16.v v17, (a0) ; RV32-NEXT: vrgatherei16.vv v12, v8, v16 ; RV32-NEXT: li a0, 140 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: lui a0, %hi(.LCPI13_1) -; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1) -; RV32-NEXT: vle16.v v8, (a0) -; RV32-NEXT: vmv.v.i v16, 5 -; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t +; RV32-NEXT: vmv.v.i v8, 5 +; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_vx_v8i64: ; RV64: # %bb.0: +; RV64-NEXT: lui a0, %hi(.LCPI13_0) +; RV64-NEXT: addi a0, a0, %lo(.LCPI13_0) +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: li a0, 115 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: lui a0, %hi(.LCPI13_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI13_0) -; RV64-NEXT: vle64.v v16, (a0) ; RV64-NEXT: vmv.v.i v12, 5 ; RV64-NEXT: vrgather.vv v12, v8, v16, v0.t ; RV64-NEXT: vmv.v.v v8, v12 @@ -763,16 +765,16 @@ define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: unmergable: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vadd.vi v11, v10, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI46_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) -; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; CHECK-NEXT: vle8.v v11, (a0) +; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vadd.vi v12, v10, 2 ; CHECK-NEXT: li a0, 234 ; CHECK-NEXT: vmv.v.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v11 -; CHECK-NEXT: vrgather.vv v10, v9, v12, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -780,19 +780,19 @@ ; CHECK-LABEL: sdiv_v6i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vdiv.vv v10, v8, v9 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v8, 4 -; CHECK-NEXT: vslidedown.vi v11, v9, 4 +; CHECK-NEXT: vslidedown.vi v9, v9, 4 +; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vdiv.vv v10, v11, v10 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vdiv.vv v8, v9, v8 +; CHECK-NEXT: vdiv.vv v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vslideup.vi v10, v8, 4 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: vse16.v v10, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y @@ -869,19 +869,19 @@ ; CHECK-LABEL: srem_v6i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vrem.vv v10, v8, v9 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v8, 4 -; CHECK-NEXT: vslidedown.vi v11, v9, 4 +; CHECK-NEXT: vslidedown.vi v9, v9, 4 +; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vrem.vv v10, v11, v10 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vrem.vv v8, v9, v8 +; CHECK-NEXT: vrem.vv v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vslideup.vi v10, v8, 4 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: vse16.v v10, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y @@ -958,19 +958,19 @@ ; CHECK-LABEL: udiv_v6i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vdivu.vv v10, v8, v9 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v8, 4 -; CHECK-NEXT: vslidedown.vi v11, v9, 4 +; CHECK-NEXT: vslidedown.vi v9, v9, 4 +; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vdivu.vv v10, v11, v10 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vdivu.vv v8, v9, v8 +; CHECK-NEXT: vdivu.vv v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vslideup.vi v10, v8, 4 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: vse16.v v10, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y @@ -1047,19 +1047,19 @@ ; CHECK-LABEL: urem_v6i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vremu.vv v10, v8, v9 ; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v8, 4 -; CHECK-NEXT: vslidedown.vi v11, v9, 4 +; CHECK-NEXT: vslidedown.vi v9, v9, 4 +; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vremu.vv v10, v11, v10 -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vremu.vv v8, v9, v8 +; CHECK-NEXT: vremu.vv v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vslideup.vi v10, v8, 4 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: vse16.v v10, (a0) ; CHECK-NEXT: ret %a = load <6 x i16>, ptr %x %b = load <6 x i16>, ptr %y @@ -1105,45 +1105,45 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: li a1, 513 -; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.x v0, a1 -; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 4 -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 ; RV32-NEXT: lui a1, 1 -; RV32-NEXT: addi a2, a1, 78 +; RV32-NEXT: addi a2, a1, 32 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a2 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vim v9, v9, 3, v0 -; RV32-NEXT: lui a2, 8 -; RV32-NEXT: addi a2, a2, 304 +; RV32-NEXT: lui a2, %hi(.LCPI65_0) +; RV32-NEXT: addi a2, a2, %lo(.LCPI65_0) +; RV32-NEXT: vle8.v v9, (a2) +; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vmerge.vim v11, v10, 1, v0 +; RV32-NEXT: vsrl.vv v11, v8, v11 +; RV32-NEXT: vmulhu.vv v9, v11, v9 +; RV32-NEXT: vsub.vv v8, v8, v9 +; RV32-NEXT: lui a2, 3 +; RV32-NEXT: addi a2, a2, -2044 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a2 +; RV32-NEXT: li a2, -128 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmerge.vim v9, v9, 2, v0 -; RV32-NEXT: lui a2, 3 -; RV32-NEXT: addi a2, a2, -2044 +; RV32-NEXT: vmerge.vxm v10, v10, a2, v0 +; RV32-NEXT: vmulhu.vv v8, v8, v10 +; RV32-NEXT: vadd.vv v8, v8, v9 +; RV32-NEXT: li a2, 513 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a2 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: li a2, -128 -; RV32-NEXT: vmerge.vxm v11, v10, a2, v0 -; RV32-NEXT: addi a1, a1, 32 +; RV32-NEXT: vmv.v.i v9, 4 +; RV32-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-NEXT: addi a1, a1, 78 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a1 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: lui a1, %hi(.LCPI65_0) -; RV32-NEXT: addi a1, a1, %lo(.LCPI65_0) -; RV32-NEXT: vle8.v v12, (a1) -; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsrl.vv v10, v8, v10 -; RV32-NEXT: vmulhu.vv v10, v10, v12 -; RV32-NEXT: vsub.vv v8, v8, v10 -; RV32-NEXT: vmulhu.vv v8, v8, v11 -; RV32-NEXT: vadd.vv v8, v8, v10 +; RV32-NEXT: vmerge.vim v9, v9, 3, v0 +; RV32-NEXT: lui a1, 8 +; RV32-NEXT: addi a1, a1, 304 +; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV32-NEXT: vmv.v.x v0, a1 +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vmerge.vim v9, v9, 2, v0 ; RV32-NEXT: vsrl.vv v8, v8, v9 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret @@ -1152,45 +1152,45 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: li a1, 513 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.v.x v0, a1 -; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, 4 -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 ; RV64-NEXT: lui a1, 1 -; RV64-NEXT: addiw a2, a1, 78 +; RV64-NEXT: addiw a2, a1, 32 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vim v9, v9, 3, v0 -; RV64-NEXT: lui a2, 8 -; RV64-NEXT: addiw a2, a2, 304 +; RV64-NEXT: lui a2, %hi(.LCPI65_0) +; RV64-NEXT: addi a2, a2, %lo(.LCPI65_0) +; RV64-NEXT: vle8.v v9, (a2) +; RV64-NEXT: vmv.v.i v10, 0 +; RV64-NEXT: vmerge.vim v11, v10, 1, v0 +; RV64-NEXT: vsrl.vv v11, v8, v11 +; RV64-NEXT: vmulhu.vv v9, v11, v9 +; RV64-NEXT: vsub.vv v8, v8, v9 +; RV64-NEXT: lui a2, 3 +; RV64-NEXT: addiw a2, a2, -2044 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a2 +; RV64-NEXT: li a2, -128 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmerge.vim v9, v9, 2, v0 -; RV64-NEXT: lui a2, 3 -; RV64-NEXT: addiw a2, a2, -2044 +; RV64-NEXT: vmerge.vxm v10, v10, a2, v0 +; RV64-NEXT: vmulhu.vv v8, v8, v10 +; RV64-NEXT: vadd.vv v8, v8, v9 +; RV64-NEXT: li a2, 513 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: li a2, -128 -; RV64-NEXT: vmerge.vxm v11, v10, a2, v0 -; RV64-NEXT: addiw a1, a1, 32 +; RV64-NEXT: vmv.v.i v9, 4 +; RV64-NEXT: vmerge.vim v9, v9, 1, v0 +; RV64-NEXT: addiw a1, a1, 78 +; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64-NEXT: vmv.v.x v0, a1 +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vmerge.vim v9, v9, 3, v0 +; RV64-NEXT: lui a1, 8 +; RV64-NEXT: addiw a1, a1, 304 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: lui a1, %hi(.LCPI65_0) -; RV64-NEXT: addi a1, a1, %lo(.LCPI65_0) -; RV64-NEXT: vle8.v v12, (a1) -; RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-NEXT: vsrl.vv v10, v8, v10 -; RV64-NEXT: vmulhu.vv v10, v10, v12 -; RV64-NEXT: vsub.vv v8, v8, v10 -; RV64-NEXT: vmulhu.vv v8, v8, v11 -; RV64-NEXT: vadd.vv v8, v8, v10 +; RV64-NEXT: vmerge.vim v9, v9, 2, v0 ; RV64-NEXT: vsrl.vv v8, v8, v9 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret @@ -1205,32 +1205,32 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: lui a1, 1048568 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.v.i v9, 1 ; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vmv.s.x v10, a1 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vmv.v.i v11, 1 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v9, v11, 6 +; CHECK-NEXT: vmv1r.v v11, v10 +; CHECK-NEXT: vslideup.vi v11, v9, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: lui a1, %hi(.LCPI66_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI66_0) ; CHECK-NEXT: vle16.v v12, (a1) -; CHECK-NEXT: vsrl.vv v9, v8, v9 -; CHECK-NEXT: vmulhu.vv v9, v9, v12 -; CHECK-NEXT: vsub.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vv v11, v8, v11 +; CHECK-NEXT: vmulhu.vv v11, v11, v12 +; CHECK-NEXT: vsub.vv v8, v8, v11 +; CHECK-NEXT: lui a1, 1048568 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma +; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vmulhu.vv v8, v8, v10 -; CHECK-NEXT: vadd.vv v8, v8, v9 +; CHECK-NEXT: vadd.vv v8, v8, v11 ; CHECK-NEXT: li a1, 33 ; CHECK-NEXT: vmv.v.x v0, a1 -; CHECK-NEXT: vmv.v.i v9, 3 -; CHECK-NEXT: vmerge.vim v9, v9, 2, v0 +; CHECK-NEXT: vmv.v.i v10, 3 +; CHECK-NEXT: vmerge.vim v10, v10, 2, v0 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma -; CHECK-NEXT: vslideup.vi v9, v11, 6 +; CHECK-NEXT: vslideup.vi v10, v9, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vsrl.vv v8, v8, v10 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %x @@ -1271,18 +1271,18 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: vmv.s.x v9, a1 -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; CHECK-NEXT: vslideup.vi v10, v9, 2 ; CHECK-NEXT: lui a1, %hi(.LCPI68_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI68_0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v9, (a1) ; CHECK-NEXT: vmulhu.vv v9, v8, v9 ; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: vmulhu.vv v8, v8, v10 +; CHECK-NEXT: lui a1, 524288 +; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; CHECK-NEXT: vslideup.vi v11, v10, 2 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vmulhu.vv v8, v8, v11 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vmv.v.i v9, 1 ; CHECK-NEXT: vmv.v.i v10, 2 @@ -1524,16 +1524,16 @@ ; RV32-NEXT: vrsub.vi v10, v10, 0 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vmadd.vv v10, v8, v9 +; RV32-NEXT: li a1, 63 +; RV32-NEXT: vsrl.vx v8, v10, a1 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v8, 1 -; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: vmv.v.i v9, 1 +; RV32-NEXT: vmv.v.i v11, 0 ; RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; RV32-NEXT: vslideup.vi v9, v8, 2 +; RV32-NEXT: vslideup.vi v11, v9, 2 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vsra.vv v8, v10, v9 -; RV32-NEXT: li a1, 63 -; RV32-NEXT: vsrl.vx v9, v10, a1 -; RV32-NEXT: vadd.vv v8, v8, v9 +; RV32-NEXT: vsra.vv v9, v10, v11 +; RV32-NEXT: vadd.vv v8, v9, v8 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; @@ -4954,13 +4954,6 @@ ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) ; LMULMAX2-RV32-NEXT: vmv.v.i v10, 0 -; LMULMAX2-RV32-NEXT: lui a2, 163907 -; LMULMAX2-RV32-NEXT: addi a2, a2, -2044 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 -; LMULMAX2-RV32-NEXT: li a2, -128 -; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v10, a2, v0 ; LMULMAX2-RV32-NEXT: lui a2, 66049 ; LMULMAX2-RV32-NEXT: addi a2, a2, 32 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -4968,13 +4961,20 @@ ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: lui a2, %hi(.LCPI181_0) ; LMULMAX2-RV32-NEXT: addi a2, a2, %lo(.LCPI181_0) -; LMULMAX2-RV32-NEXT: vle8.v v14, (a2) -; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-RV32-NEXT: vsrl.vv v10, v8, v10 -; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v14 -; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV32-NEXT: vle8.v v12, (a2) +; LMULMAX2-RV32-NEXT: vmerge.vim v14, v10, 1, v0 +; LMULMAX2-RV32-NEXT: vsrl.vv v14, v8, v14 +; LMULMAX2-RV32-NEXT: vmulhu.vv v12, v14, v12 +; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v12 +; LMULMAX2-RV32-NEXT: lui a2, 163907 +; LMULMAX2-RV32-NEXT: addi a2, a2, -2044 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 +; LMULMAX2-RV32-NEXT: li a2, -128 +; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 +; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v10 +; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vmv.v.i v10, 4 ; LMULMAX2-RV32-NEXT: lui a2, 8208 ; LMULMAX2-RV32-NEXT: addi a2, a2, 513 @@ -5004,13 +5004,6 @@ ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) ; LMULMAX2-RV64-NEXT: vmv.v.i v10, 0 -; LMULMAX2-RV64-NEXT: lui a2, 163907 -; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 -; LMULMAX2-RV64-NEXT: li a2, -128 -; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v10, a2, v0 ; LMULMAX2-RV64-NEXT: lui a2, 66049 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 32 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -5018,13 +5011,20 @@ ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI181_0) ; LMULMAX2-RV64-NEXT: addi a2, a2, %lo(.LCPI181_0) -; LMULMAX2-RV64-NEXT: vle8.v v14, (a2) -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-RV64-NEXT: vsrl.vv v10, v8, v10 -; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v14 -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: vle8.v v12, (a2) +; LMULMAX2-RV64-NEXT: vmerge.vim v14, v10, 1, v0 +; LMULMAX2-RV64-NEXT: vsrl.vv v14, v8, v14 +; LMULMAX2-RV64-NEXT: vmulhu.vv v12, v14, v12 +; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v12 +; LMULMAX2-RV64-NEXT: lui a2, 163907 +; LMULMAX2-RV64-NEXT: addiw a2, a2, -2044 +; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 +; LMULMAX2-RV64-NEXT: li a2, -128 +; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 +; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vmv.v.i v10, 4 ; LMULMAX2-RV64-NEXT: lui a2, 8208 ; LMULMAX2-RV64-NEXT: addiw a2, a2, 513 @@ -5073,38 +5073,38 @@ ; LMULMAX2-RV32: # %bb.0: ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle16.v v10, (a0) -; LMULMAX2-RV32-NEXT: lui a1, 2 -; LMULMAX2-RV32-NEXT: addi a1, a1, 289 -; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v8, 3 -; LMULMAX2-RV32-NEXT: vmerge.vim v12, v8, 2, v0 ; LMULMAX2-RV32-NEXT: lui a1, 4 ; LMULMAX2-RV32-NEXT: addi a1, a1, 64 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v8, a1 ; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI182_0) +; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) +; LMULMAX2-RV32-NEXT: vle16.v v12, (a1) +; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 ; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 1, v0 +; LMULMAX2-RV32-NEXT: vmerge.vim v16, v14, 1, v0 +; LMULMAX2-RV32-NEXT: vsrl.vv v16, v10, v16 +; LMULMAX2-RV32-NEXT: vmulhu.vv v12, v16, v12 +; LMULMAX2-RV32-NEXT: vsub.vv v10, v10, v12 ; LMULMAX2-RV32-NEXT: li a1, 257 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 -; LMULMAX2-RV32-NEXT: lui a1, %hi(.LCPI182_0) -; LMULMAX2-RV32-NEXT: addi a1, a1, %lo(.LCPI182_0) -; LMULMAX2-RV32-NEXT: vle16.v v16, (a1) ; LMULMAX2-RV32-NEXT: lui a1, 1048568 -; LMULMAX2-RV32-NEXT: vmerge.vxm v18, v14, a1, v0 +; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmerge.vxm v14, v14, a1, v0 +; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v14 +; LMULMAX2-RV32-NEXT: vadd.vv v10, v10, v12 +; LMULMAX2-RV32-NEXT: lui a1, 2 +; LMULMAX2-RV32-NEXT: addi a1, a1, 289 +; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.x v0, a1 +; LMULMAX2-RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmv.v.i v12, 3 +; LMULMAX2-RV32-NEXT: vmerge.vim v12, v12, 2, v0 ; LMULMAX2-RV32-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV32-NEXT: vmerge.vim v8, v14, 1, v0 +; LMULMAX2-RV32-NEXT: vmerge.vim v8, v12, 1, v0 ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v16 -; LMULMAX2-RV32-NEXT: vsub.vv v10, v10, v8 -; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v10, v18 -; LMULMAX2-RV32-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret ; @@ -5112,38 +5112,38 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle16.v v10, (a0) -; LMULMAX2-RV64-NEXT: lui a1, 2 -; LMULMAX2-RV64-NEXT: addiw a1, a1, 289 -; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.x v0, a1 -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.i v8, 3 -; LMULMAX2-RV64-NEXT: vmerge.vim v12, v8, 2, v0 ; LMULMAX2-RV64-NEXT: lui a1, 4 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 64 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; LMULMAX2-RV64-NEXT: vmv.v.x v8, a1 ; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI182_0) +; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI182_0) +; LMULMAX2-RV64-NEXT: vle16.v v12, (a1) +; LMULMAX2-RV64-NEXT: vmv.v.i v14, 0 ; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 1, v0 +; LMULMAX2-RV64-NEXT: vmerge.vim v16, v14, 1, v0 +; LMULMAX2-RV64-NEXT: vsrl.vv v16, v10, v16 +; LMULMAX2-RV64-NEXT: vmulhu.vv v12, v16, v12 +; LMULMAX2-RV64-NEXT: vsub.vv v10, v10, v12 ; LMULMAX2-RV64-NEXT: li a1, 257 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; LMULMAX2-RV64-NEXT: vmv.v.x v0, a1 -; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmv.v.i v14, 0 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI182_0) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI182_0) -; LMULMAX2-RV64-NEXT: vle16.v v16, (a1) ; LMULMAX2-RV64-NEXT: lui a1, 1048568 -; LMULMAX2-RV64-NEXT: vmerge.vxm v18, v14, a1, v0 +; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmerge.vxm v14, v14, a1, v0 +; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v14 +; LMULMAX2-RV64-NEXT: vadd.vv v10, v10, v12 +; LMULMAX2-RV64-NEXT: lui a1, 2 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 289 +; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.x v0, a1 +; LMULMAX2-RV64-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmv.v.i v12, 3 +; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 2, v0 ; LMULMAX2-RV64-NEXT: vmv1r.v v0, v8 -; LMULMAX2-RV64-NEXT: vmerge.vim v8, v14, 1, v0 +; LMULMAX2-RV64-NEXT: vmerge.vim v8, v12, 1, v0 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v16 -; LMULMAX2-RV64-NEXT: vsub.vv v10, v10, v8 -; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v10, v18 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 -; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vse16.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -5172,18 +5172,18 @@ ; LMULMAX2: # %bb.0: ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v8, (a0) +; LMULMAX2-NEXT: lui a1, %hi(.LCPI183_0) +; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI183_0) +; LMULMAX2-NEXT: vle32.v v10, (a1) +; LMULMAX2-NEXT: vmulhu.vv v10, v8, v10 +; LMULMAX2-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-NEXT: li a1, 68 ; LMULMAX2-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; LMULMAX2-NEXT: vmv.v.x v0, a1 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; LMULMAX2-NEXT: lui a1, %hi(.LCPI183_0) -; LMULMAX2-NEXT: addi a1, a1, %lo(.LCPI183_0) -; LMULMAX2-NEXT: vle32.v v10, (a1) ; LMULMAX2-NEXT: vmv.v.i v12, 0 ; LMULMAX2-NEXT: lui a1, 524288 ; LMULMAX2-NEXT: vmerge.vxm v12, v12, a1, v0 -; LMULMAX2-NEXT: vmulhu.vv v10, v8, v10 -; LMULMAX2-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-NEXT: vadd.vv v8, v8, v10 ; LMULMAX2-NEXT: li a1, 136 @@ -5199,33 +5199,33 @@ ; LMULMAX1-RV32-LABEL: mulhu_v8i32: ; LMULMAX1-RV32: # %bb.0: ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 -; LMULMAX1-RV32-NEXT: vle32.v v9, (a1) -; LMULMAX1-RV32-NEXT: lui a2, 524288 -; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2 -; LMULMAX1-RV32-NEXT: vmv.v.i v11, 0 -; LMULMAX1-RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma -; LMULMAX1-RV32-NEXT: vslideup.vi v11, v10, 2 +; LMULMAX1-RV32-NEXT: vle32.v v8, (a1) ; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI183_0) ; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI183_0) +; LMULMAX1-RV32-NEXT: vle32.v v9, (a2) +; LMULMAX1-RV32-NEXT: vle32.v v10, (a0) +; LMULMAX1-RV32-NEXT: vmulhu.vv v11, v8, v9 +; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v11 +; LMULMAX1-RV32-NEXT: lui a2, 524288 +; LMULMAX1-RV32-NEXT: vmv.s.x v12, a2 +; LMULMAX1-RV32-NEXT: vmv.v.i v13, 0 +; LMULMAX1-RV32-NEXT: vsetivli zero, 3, e32, m1, tu, ma +; LMULMAX1-RV32-NEXT: vslideup.vi v13, v12, 2 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) -; LMULMAX1-RV32-NEXT: vmulhu.vv v12, v9, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v9, v9, v12 -; LMULMAX1-RV32-NEXT: vmulhu.vv v9, v9, v11 -; LMULMAX1-RV32-NEXT: vadd.vv v9, v9, v12 -; LMULMAX1-RV32-NEXT: vmv.v.i v12, 1 -; LMULMAX1-RV32-NEXT: vmv.v.i v13, 2 -; LMULMAX1-RV32-NEXT: vslideup.vi v13, v12, 3 -; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v13 -; LMULMAX1-RV32-NEXT: vmulhu.vv v10, v8, v10 -; LMULMAX1-RV32-NEXT: vsub.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vmulhu.vv v8, v8, v11 -; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v10 -; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v13 -; LMULMAX1-RV32-NEXT: vse32.v v8, (a0) -; LMULMAX1-RV32-NEXT: vse32.v v9, (a1) +; LMULMAX1-RV32-NEXT: vmulhu.vv v8, v8, v13 +; LMULMAX1-RV32-NEXT: vadd.vv v8, v8, v11 +; LMULMAX1-RV32-NEXT: vmv.v.i v11, 1 +; LMULMAX1-RV32-NEXT: vmv.v.i v12, 2 +; LMULMAX1-RV32-NEXT: vslideup.vi v12, v11, 3 +; LMULMAX1-RV32-NEXT: vsrl.vv v8, v8, v12 +; LMULMAX1-RV32-NEXT: vmulhu.vv v9, v10, v9 +; LMULMAX1-RV32-NEXT: vsub.vv v10, v10, v9 +; LMULMAX1-RV32-NEXT: vmulhu.vv v10, v10, v13 +; LMULMAX1-RV32-NEXT: vadd.vv v9, v10, v9 +; LMULMAX1-RV32-NEXT: vsrl.vv v9, v9, v12 +; LMULMAX1-RV32-NEXT: vse32.v v9, (a0) +; LMULMAX1-RV32-NEXT: vse32.v v8, (a1) ; LMULMAX1-RV32-NEXT: ret ; ; LMULMAX1-RV64-LABEL: mulhu_v8i32: @@ -5282,24 +5282,24 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: li a1, -1 -; LMULMAX2-RV64-NEXT: slli a1, a1, 63 -; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1 -; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0 -; LMULMAX2-RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma -; LMULMAX2-RV64-NEXT: vslideup.vi v12, v10, 2 ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI184_0) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle64.v v10, (a1) ; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v8, v10 +; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: li a1, -1 +; LMULMAX2-RV64-NEXT: slli a1, a1, 63 +; LMULMAX2-RV64-NEXT: vmv.s.x v12, a1 +; LMULMAX2-RV64-NEXT: vmv.v.i v14, 0 +; LMULMAX2-RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma +; LMULMAX2-RV64-NEXT: vslideup.vi v14, v12, 2 +; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI184_1) ; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI184_1) -; LMULMAX2-RV64-NEXT: vle64.v v14, (a1) -; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 +; LMULMAX2-RV64-NEXT: vle64.v v12, (a1) +; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v14 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v14 +; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -5329,46 +5329,46 @@ ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV64-NEXT: vmv.v.i v10, 0 -; LMULMAX1-RV64-NEXT: li a2, -1 -; LMULMAX1-RV64-NEXT: slli a2, a2, 63 +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI184_0) +; LMULMAX1-RV64-NEXT: addi a1, a1, %lo(.LCPI184_0) +; LMULMAX1-RV64-NEXT: vlse64.v v9, (a1), zero +; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI184_1) +; LMULMAX1-RV64-NEXT: ld a1, %lo(.LCPI184_1)(a1) +; LMULMAX1-RV64-NEXT: addi a2, a0, 16 +; LMULMAX1-RV64-NEXT: vle64.v v10, (a2) ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI184_0) -; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI184_0) +; LMULMAX1-RV64-NEXT: vmv.s.x v9, a1 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vlse64.v v11, (a2), zero -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI184_1) -; LMULMAX1-RV64-NEXT: ld a2, %lo(.LCPI184_1)(a2) +; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v10, v9 +; LMULMAX1-RV64-NEXT: vsub.vv v10, v10, v9 +; LMULMAX1-RV64-NEXT: vmv.v.i v11, 0 +; LMULMAX1-RV64-NEXT: li a1, -1 +; LMULMAX1-RV64-NEXT: slli a1, a1, 63 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 +; LMULMAX1-RV64-NEXT: vmv.s.x v11, a1 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; LMULMAX1-RV64-NEXT: vmulhu.vv v11, v9, v11 -; LMULMAX1-RV64-NEXT: vsub.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: vmulhu.vv v9, v9, v10 -; LMULMAX1-RV64-NEXT: vadd.vv v9, v9, v11 +; LMULMAX1-RV64-NEXT: vmulhu.vv v10, v10, v11 +; LMULMAX1-RV64-NEXT: vadd.vv v9, v10, v9 ; LMULMAX1-RV64-NEXT: vid.v v10 ; LMULMAX1-RV64-NEXT: vadd.vi v11, v10, 2 ; LMULMAX1-RV64-NEXT: vsrl.vv v9, v9, v11 -; LMULMAX1-RV64-NEXT: lui a2, 838861 -; LMULMAX1-RV64-NEXT: addiw a2, a2, -819 -; LMULMAX1-RV64-NEXT: slli a3, a2, 32 -; LMULMAX1-RV64-NEXT: add a2, a2, a3 -; LMULMAX1-RV64-NEXT: vmv.v.x v11, a2 -; LMULMAX1-RV64-NEXT: lui a2, 699051 -; LMULMAX1-RV64-NEXT: addiw a2, a2, -1365 -; LMULMAX1-RV64-NEXT: slli a3, a2, 32 -; LMULMAX1-RV64-NEXT: add a2, a2, a3 +; LMULMAX1-RV64-NEXT: lui a1, 838861 +; LMULMAX1-RV64-NEXT: addiw a1, a1, -819 +; LMULMAX1-RV64-NEXT: slli a3, a1, 32 +; LMULMAX1-RV64-NEXT: add a1, a1, a3 +; LMULMAX1-RV64-NEXT: vmv.v.x v11, a1 +; LMULMAX1-RV64-NEXT: lui a1, 699051 +; LMULMAX1-RV64-NEXT: addiw a1, a1, -1365 +; LMULMAX1-RV64-NEXT: slli a3, a1, 32 +; LMULMAX1-RV64-NEXT: add a1, a1, a3 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; LMULMAX1-RV64-NEXT: vmv.s.x v11, a2 +; LMULMAX1-RV64-NEXT: vmv.s.x v11, a1 ; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; LMULMAX1-RV64-NEXT: vmulhu.vv v8, v8, v11 ; LMULMAX1-RV64-NEXT: vadd.vi v10, v10, 1 ; LMULMAX1-RV64-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX1-RV64-NEXT: vse64.v v8, (a0) -; LMULMAX1-RV64-NEXT: vse64.v v9, (a1) +; LMULMAX1-RV64-NEXT: vse64.v v9, (a2) ; LMULMAX1-RV64-NEXT: ret %a = load <4 x i64>, ptr %x %b = udiv <4 x i64> %a, @@ -5382,18 +5382,18 @@ ; LMULMAX2-RV32-NEXT: li a1, 32 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV32-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV32-NEXT: vmv.v.i v10, 7 +; LMULMAX2-RV32-NEXT: li a2, -123 +; LMULMAX2-RV32-NEXT: vmv.v.x v10, a2 ; LMULMAX2-RV32-NEXT: lui a2, 304453 ; LMULMAX2-RV32-NEXT: addi a2, a2, -1452 ; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.x v0, a2 +; LMULMAX2-RV32-NEXT: li a2, 57 ; LMULMAX2-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-RV32-NEXT: vmerge.vxm v10, v10, a2, v0 +; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v10 +; LMULMAX2-RV32-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-RV32-NEXT: li a1, -123 -; LMULMAX2-RV32-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV32-NEXT: li a1, 57 -; LMULMAX2-RV32-NEXT: vmerge.vxm v12, v12, a1, v0 -; LMULMAX2-RV32-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV32-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: vse8.v v8, (a0) ; LMULMAX2-RV32-NEXT: ret @@ -5403,18 +5403,18 @@ ; LMULMAX2-RV64-NEXT: li a1, 32 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle8.v v8, (a0) -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 7 +; LMULMAX2-RV64-NEXT: li a2, -123 +; LMULMAX2-RV64-NEXT: vmv.v.x v10, a2 ; LMULMAX2-RV64-NEXT: lui a2, 304453 ; LMULMAX2-RV64-NEXT: addiw a2, a2, -1452 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; LMULMAX2-RV64-NEXT: vmv.v.x v0, a2 +; LMULMAX2-RV64-NEXT: li a2, 57 ; LMULMAX2-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmerge.vxm v10, v10, a2, v0 +; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: vmv.v.i v10, 7 ; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; LMULMAX2-RV64-NEXT: li a1, -123 -; LMULMAX2-RV64-NEXT: vmv.v.x v12, a1 -; LMULMAX2-RV64-NEXT: li a1, 57 -; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a1, v0 -; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vse8.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret @@ -5657,27 +5657,27 @@ ; LMULMAX2-RV64: # %bb.0: ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) -; LMULMAX2-RV64-NEXT: vmv.v.i v10, 1 +; LMULMAX2-RV64-NEXT: vmv.v.i v10, -1 ; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; LMULMAX2-RV64-NEXT: vmv.v.i v0, 5 -; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 0, v0 ; LMULMAX2-RV64-NEXT: lui a1, 349525 ; LMULMAX2-RV64-NEXT: addiw a1, a1, 1365 ; LMULMAX2-RV64-NEXT: slli a2, a1, 32 ; LMULMAX2-RV64-NEXT: add a1, a1, a2 ; LMULMAX2-RV64-NEXT: lui a2, %hi(.LCPI188_0) ; LMULMAX2-RV64-NEXT: ld a2, %lo(.LCPI188_0)(a2) -; LMULMAX2-RV64-NEXT: vmv.v.i v12, -1 -; LMULMAX2-RV64-NEXT: vmerge.vim v12, v12, 0, v0 -; LMULMAX2-RV64-NEXT: vmv.v.x v14, a1 -; LMULMAX2-RV64-NEXT: vmerge.vxm v14, v14, a2, v0 -; LMULMAX2-RV64-NEXT: vmulh.vv v14, v8, v14 -; LMULMAX2-RV64-NEXT: vmacc.vv v14, v8, v12 -; LMULMAX2-RV64-NEXT: vsra.vv v8, v14, v10 +; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 0, v0 +; LMULMAX2-RV64-NEXT: vmv.v.x v12, a1 +; LMULMAX2-RV64-NEXT: vmerge.vxm v12, v12, a2, v0 +; LMULMAX2-RV64-NEXT: vmulh.vv v12, v8, v12 +; LMULMAX2-RV64-NEXT: vmacc.vv v12, v8, v10 ; LMULMAX2-RV64-NEXT: li a1, 63 -; LMULMAX2-RV64-NEXT: vsrl.vx v10, v14, a1 -; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 +; LMULMAX2-RV64-NEXT: vsrl.vx v8, v12, a1 +; LMULMAX2-RV64-NEXT: vmv.v.i v10, 1 +; LMULMAX2-RV64-NEXT: vmerge.vim v10, v10, 0, v0 +; LMULMAX2-RV64-NEXT: vsra.vv v10, v12, v10 +; LMULMAX2-RV64-NEXT: vadd.vv v8, v10, v8 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -5723,7 +5723,7 @@ ; LMULMAX1-RV64-NEXT: vsra.vv v11, v11, v12 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v11, v9 ; LMULMAX1-RV64-NEXT: vmulh.vv v10, v8, v10 -; LMULMAX1-RV64-NEXT: vmacc.vv v10, v8, v13 +; LMULMAX1-RV64-NEXT: vmacc.vv v10, v13, v8 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v10, a2 ; LMULMAX1-RV64-NEXT: vsra.vv v10, v10, v12 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -660,62 +660,73 @@ ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 ; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd6, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 86 * vlenb -; RV64-NEXT: addi a2, a1, 256 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v8, (a2) +; RV64-NEXT: addi a2, a1, 256 +; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 77 +; RV64-NEXT: li a3, 53 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: addi a2, a1, 128 -; RV64-NEXT: vle64.v v24, (a2) +; RV64-NEXT: vle64.v v8, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 53 +; RV64-NEXT: li a3, 77 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v0, (a1) +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 69 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vrgather.vi v8, v16, 4 +; RV64-NEXT: li a1, 128 +; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vmv.v.x v1, a1 +; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma +; RV64-NEXT: vslidedown.vi v24, v16, 8 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vid.v v8 +; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vrgather.vi v8, v24, 2, v0.t +; RV64-NEXT: vmv.v.v v4, v8 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: li a1, 6 -; RV64-NEXT: vmul.vx v8, v8, a1 -; RV64-NEXT: li a1, 56 -; RV64-NEXT: vrgather.vv v16, v0, v8 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 69 -; RV64-NEXT: mul a2, a2, a3 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v8, v8, -16 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.v.x v0, a1 +; RV64-NEXT: vid.v v16 +; RV64-NEXT: vmul.vx v24, v16, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 41 +; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: li a1, 128 +; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: li a1, 56 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 77 +; RV64-NEXT: li a3, 69 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v20, v8, 4 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vrgather.vv v16, v8, v24 +; RV64-NEXT: vadd.vi v24, v24, -16 +; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 37 @@ -723,61 +734,60 @@ ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma -; RV64-NEXT: vslidedown.vi v24, v8, 8 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v20, v24, 2, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v20, v16 +; RV64-NEXT: vmv.v.v v4, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 29 +; RV64-NEXT: li a2, 25 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v8, v24, 1 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v12, v8, 5 +; RV64-NEXT: vmv1r.v v0, v1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v0, v8 -; RV64-NEXT: vadd.vi v8, v24, -15 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v12, v24, 3, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v0, v8, 1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v24, 5 +; RV64-NEXT: vrgather.vv v16, v24, v0 +; RV64-NEXT: vadd.vi v24, v8, -15 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 @@ -785,16 +795,22 @@ ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v8, v24, 3, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 41 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 25 +; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -802,8 +818,8 @@ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vmv.v.i v8, 6 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 29 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill @@ -818,35 +834,41 @@ ; RV64-NEXT: vslideup.vi v8, v12, 5 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v12, v16, v8 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 45 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v16, v8, 4, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v12, v16, 4, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 41 +; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vadd.vi v8, v0, 2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -866,28 +888,28 @@ ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a2, a1, 4 ; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v24, v16, v8, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 41 +; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: vmv.v.v v8, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 41 +; RV64-NEXT: li a2, 37 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -912,15 +934,15 @@ ; RV64-NEXT: vslideup.vi v8, v12, 5 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v20, v16, v8 +; RV64-NEXT: vrgather.vv v12, v16, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 37 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload @@ -929,24 +951,24 @@ ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v20, v8, 5, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v12, v16, 5, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 37 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vadd.vi v24, v0, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -956,31 +978,31 @@ ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v24, v0, -13 +; RV64-NEXT: vadd.vi v16, v0, -13 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 37 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vmv.v.v v12, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 37 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a2, a1, 5 +; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill @@ -992,8 +1014,8 @@ ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 29 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload @@ -1001,7 +1023,7 @@ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: li a1, 192 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 77 +; RV64-NEXT: li a3, 53 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 @@ -1024,53 +1046,59 @@ ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vrgather.vv v20, v8, v16, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 29 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vadd.vi v24, v0, 4 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vrgather.vv v8, v16, v24 ; RV64-NEXT: li a1, 28 -; RV64-NEXT: vadd.vi v24, v0, -12 +; RV64-NEXT: vadd.vi v16, v0, -12 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a1 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 29 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vmv.v.v v12, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 29 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill @@ -1086,16 +1114,16 @@ ; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vslideup.vi v16, v8, 6 +; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v24, v8, 6 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v20, v8, 3 +; RV64-NEXT: vrgather.vi v16, v8, 3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a2, a1, 3 ; RV64-NEXT: add a1, a2, a1 @@ -1108,65 +1136,63 @@ ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v20, v8, v16, v0.t +; RV64-NEXT: vrgather.vv v16, v8, v24, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a2, a1, 4 ; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vadd.vi v24, v8, 5 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vrgather.vv v8, v0, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 61 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vadd.vi v8, v24, -11 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 77 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 69 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 53 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -1182,7 +1208,7 @@ ; RV64-NEXT: vmv.v.v v12, v16 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 29 +; RV64-NEXT: li a2, 25 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -1192,23 +1218,23 @@ ; RV64-NEXT: vse64.v v12, (a1) ; RV64-NEXT: addi a1, a0, 256 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 5 -; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: li a3, 29 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 192 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 37 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a3, a2, 5 +; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 41 +; RV64-NEXT: li a3, 37 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 @@ -1216,7 +1242,7 @@ ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a0, a0, 64 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 25 +; RV64-NEXT: li a2, 41 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -225,9 +225,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: vfredusum.vs v8, v8, v24 +; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, ptr %x @@ -655,9 +655,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: vfredusum.vs v8, v8, v24 +; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <64 x float>, ptr %x @@ -692,12 +692,12 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vfredusum.vs v8, v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vfredusum.vs v8, v24, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <64 x half>, ptr %x @@ -1062,9 +1062,9 @@ ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vfmv.s.f v24, fa0 ; CHECK-NEXT: vfadd.vv v8, v8, v16 -; CHECK-NEXT: vfredusum.vs v8, v8, v24 +; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <32 x double>, ptr %x @@ -1253,12 +1253,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: lui a1, %hi(.LCPI72_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI72_0)(a1) ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vfmv.s.f v24, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI72_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI72_0)(a0) ; CHECK-NEXT: vfmin.vv v8, v8, v16 -; CHECK-NEXT: vfredmin.vs v8, v8, v24 +; CHECK-NEXT: vfmv.s.f v16, fa5 +; CHECK-NEXT: vfredmin.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, ptr %x @@ -1484,33 +1484,19 @@ declare double @llvm.vector.reduce.fmin.v32f64(<32 x double>) define double @vreduce_fmin_v32f64(ptr %x) { -; RV32-LABEL: vreduce_fmin_v32f64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: lui a1, %hi(.LCPI82_0) -; RV32-NEXT: fld fa5, %lo(.LCPI82_0)(a1) -; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: vfmv.s.f v24, fa5 -; RV32-NEXT: vfmin.vv v8, v8, v16 -; RV32-NEXT: vfredmin.vs v8, v8, v24 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmin_v32f64: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI82_0) -; RV64-NEXT: fld fa5, %lo(.LCPI82_0)(a1) -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle64.v v16, (a0) -; RV64-NEXT: vfmv.s.f v24, fa5 -; RV64-NEXT: vfmin.vv v8, v8, v16 -; RV64-NEXT: vfredmin.vs v8, v8, v24 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmin_v32f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle64.v v16, (a0) +; CHECK-NEXT: lui a0, %hi(.LCPI82_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI82_0)(a0) +; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vfmv.s.f v16, fa5 +; CHECK-NEXT: vfredmin.vs v8, v8, v16 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <32 x double>, ptr %x %red = call double @llvm.vector.reduce.fmin.v32f64(<32 x double> %v) ret double %red @@ -1590,10 +1576,10 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: li a0, -512 -; CHECK-NEXT: vmv.s.x v24, a0 ; CHECK-NEXT: vfmax.vv v8, v8, v16 -; CHECK-NEXT: vfredmax.vs v8, v8, v24 +; CHECK-NEXT: li a0, -512 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vfredmax.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, ptr %x @@ -1819,33 +1805,19 @@ declare double @llvm.vector.reduce.fmax.v32f64(<32 x double>) define double @vreduce_fmax_v32f64(ptr %x) { -; RV32-LABEL: vreduce_fmax_v32f64: -; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: lui a1, %hi(.LCPI97_0) -; RV32-NEXT: fld fa5, %lo(.LCPI97_0)(a1) -; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: vfmv.s.f v24, fa5 -; RV32-NEXT: vfmax.vv v8, v8, v16 -; RV32-NEXT: vfredmax.vs v8, v8, v24 -; RV32-NEXT: vfmv.f.s fa0, v8 -; RV32-NEXT: ret -; -; RV64-LABEL: vreduce_fmax_v32f64: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI97_0) -; RV64-NEXT: fld fa5, %lo(.LCPI97_0)(a1) -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle64.v v16, (a0) -; RV64-NEXT: vfmv.s.f v24, fa5 -; RV64-NEXT: vfmax.vv v8, v8, v16 -; RV64-NEXT: vfredmax.vs v8, v8, v24 -; RV64-NEXT: vfmv.f.s fa0, v8 -; RV64-NEXT: ret +; CHECK-LABEL: vreduce_fmax_v32f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle64.v v16, (a0) +; CHECK-NEXT: lui a0, %hi(.LCPI97_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI97_0)(a0) +; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vfmv.s.f v16, fa5 +; CHECK-NEXT: vfredmax.vs v8, v8, v16 +; CHECK-NEXT: vfmv.f.s fa0, v8 +; CHECK-NEXT: ret %v = load <32 x double>, ptr %x %red = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %v) ret double %red diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -141,9 +141,9 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vredsum.vs v8, v8, v24 +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, ptr %x @@ -515,9 +515,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vredsum.vs v8, v8, v24 +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, ptr %x @@ -534,12 +534,12 @@ ; CHECK-NEXT: li a0, 64 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i8>, ptr %x @@ -557,12 +557,12 @@ ; CHECK-NEXT: li a0, 64 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 -; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i8>, ptr %x @@ -882,9 +882,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vredsum.vs v8, v8, v24 +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vredsum.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, ptr %x @@ -901,12 +901,12 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vwadd.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i16>, ptr %x @@ -924,12 +924,12 @@ ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v16, v8, a0 -; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v12, zero ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v24, v8, v16 -; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma -; CHECK-NEXT: vredsum.vs v8, v24, v12 +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vredsum.vs v8, v24, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i16>, ptr %x @@ -945,10 +945,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_add_v1i64: @@ -1399,9 +1399,9 @@ ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: vmv.s.x v24, zero ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: vredsum.vs v8, v8, v24 +; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vredsum.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1415,9 +1415,9 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) -; RV64-NEXT: vmv.s.x v24, zero ; RV64-NEXT: vadd.vv v8, v8, v16 -; RV64-NEXT: vredsum.vs v8, v8, v24 +; RV64-NEXT: vmv.s.x v16, zero +; RV64-NEXT: vredsum.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, ptr %x @@ -2175,10 +2175,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_and_v1i64: @@ -2549,9 +2549,9 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vredor.vs v8, v8, v24 +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, ptr %x @@ -2681,9 +2681,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vredor.vs v8, v8, v24 +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, ptr %x @@ -2796,9 +2796,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vor.vv v8, v8, v16 -; CHECK-NEXT: vredor.vs v8, v8, v24 +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vredor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, ptr %x @@ -2813,10 +2813,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_or_v1i64: @@ -2955,9 +2955,9 @@ ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: vmv.s.x v24, zero ; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vredor.vs v8, v8, v24 +; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vredor.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2971,9 +2971,9 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) -; RV64-NEXT: vmv.s.x v24, zero ; RV64-NEXT: vor.vv v8, v8, v16 -; RV64-NEXT: vredor.vs v8, v8, v24 +; RV64-NEXT: vmv.s.x v16, zero +; RV64-NEXT: vredor.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, ptr %x @@ -3167,9 +3167,9 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vxor.vv v8, v8, v16 -; CHECK-NEXT: vredxor.vs v8, v8, v24 +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, ptr %x @@ -3299,9 +3299,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vxor.vv v8, v8, v16 -; CHECK-NEXT: vredxor.vs v8, v8, v24 +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, ptr %x @@ -3414,9 +3414,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vxor.vv v8, v8, v16 -; CHECK-NEXT: vredxor.vs v8, v8, v24 +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vredxor.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, ptr %x @@ -3431,10 +3431,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_xor_v1i64: @@ -3573,9 +3573,9 @@ ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: vmv.s.x v24, zero ; RV32-NEXT: vxor.vv v8, v8, v16 -; RV32-NEXT: vredxor.vs v8, v8, v24 +; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vredxor.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -3589,9 +3589,9 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) -; RV64-NEXT: vmv.s.x v24, zero ; RV64-NEXT: vxor.vv v8, v8, v16 -; RV64-NEXT: vredxor.vs v8, v8, v24 +; RV64-NEXT: vmv.s.x v16, zero +; RV64-NEXT: vredxor.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, ptr %x @@ -3792,10 +3792,10 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: li a0, 127 -; CHECK-NEXT: vmv.s.x v24, a0 ; CHECK-NEXT: vmin.vv v8, v8, v16 -; CHECK-NEXT: vredmin.vs v8, v8, v24 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vredmin.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, ptr %x @@ -4005,11 +4005,11 @@ ; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle16.v v16, (a0) +; RV32-NEXT: vmin.vv v8, v8, v16 ; RV32-NEXT: lui a0, 8 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v24, a0 -; RV32-NEXT: vmin.vv v8, v8, v16 -; RV32-NEXT: vredmin.vs v8, v8, v24 +; RV32-NEXT: vmv.s.x v16, a0 +; RV32-NEXT: vredmin.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; @@ -4020,11 +4020,11 @@ ; RV64-NEXT: vle16.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle16.v v16, (a0) +; RV64-NEXT: vmin.vv v8, v8, v16 ; RV64-NEXT: lui a0, 8 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v24, a0 -; RV64-NEXT: vmin.vv v8, v8, v16 -; RV64-NEXT: vredmin.vs v8, v8, v24 +; RV64-NEXT: vmv.s.x v16, a0 +; RV64-NEXT: vredmin.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <128 x i16>, ptr %x @@ -4203,11 +4203,11 @@ ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle32.v v16, (a0) +; RV32-NEXT: vmin.vv v8, v8, v16 ; RV32-NEXT: lui a0, 524288 ; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: vmv.s.x v24, a0 -; RV32-NEXT: vmin.vv v8, v8, v16 -; RV32-NEXT: vredmin.vs v8, v8, v24 +; RV32-NEXT: vmv.s.x v16, a0 +; RV32-NEXT: vredmin.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: ret ; @@ -4218,11 +4218,11 @@ ; RV64-NEXT: vle32.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle32.v v16, (a0) +; RV64-NEXT: vmin.vv v8, v8, v16 ; RV64-NEXT: lui a0, 524288 ; RV64-NEXT: addiw a0, a0, -1 -; RV64-NEXT: vmv.s.x v24, a0 -; RV64-NEXT: vmin.vv v8, v8, v16 -; RV64-NEXT: vredmin.vs v8, v8, v24 +; RV64-NEXT: vmv.s.x v16, a0 +; RV64-NEXT: vredmin.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <64 x i32>, ptr %x @@ -4237,10 +4237,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smin_v1i64: @@ -4458,11 +4458,11 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vmin.vv v8, v8, v16 ; RV64-NEXT: li a0, -1 ; RV64-NEXT: srli a0, a0, 1 -; RV64-NEXT: vmv.s.x v24, a0 -; RV64-NEXT: vmin.vv v8, v8, v16 -; RV64-NEXT: vredmin.vs v8, v8, v24 +; RV64-NEXT: vmv.s.x v16, a0 +; RV64-NEXT: vredmin.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, ptr %x @@ -4675,10 +4675,10 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: li a0, -128 -; CHECK-NEXT: vmv.s.x v24, a0 ; CHECK-NEXT: vmax.vv v8, v8, v16 -; CHECK-NEXT: vredmax.vs v8, v8, v24 +; CHECK-NEXT: li a0, -128 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, ptr %x @@ -4814,10 +4814,10 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: lui a0, 1048568 -; CHECK-NEXT: vmv.s.x v24, a0 ; CHECK-NEXT: vmax.vv v8, v8, v16 -; CHECK-NEXT: vredmax.vs v8, v8, v24 +; CHECK-NEXT: lui a0, 1048568 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, ptr %x @@ -4935,10 +4935,10 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v24, a0 ; CHECK-NEXT: vmax.vv v8, v8, v16 -; CHECK-NEXT: vredmax.vs v8, v8, v24 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, ptr %x @@ -4953,10 +4953,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_smax_v1i64: @@ -5164,11 +5164,11 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) +; RV64-NEXT: vmax.vv v8, v8, v16 ; RV64-NEXT: li a0, -1 ; RV64-NEXT: slli a0, a0, 63 -; RV64-NEXT: vmv.s.x v24, a0 -; RV64-NEXT: vmax.vv v8, v8, v16 -; RV64-NEXT: vredmax.vs v8, v8, v24 +; RV64-NEXT: vmv.s.x v16, a0 +; RV64-NEXT: vredmax.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, ptr %x @@ -5660,10 +5660,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umin_v1i64: @@ -6034,9 +6034,9 @@ ; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vredmaxu.vs v8, v8, v24 +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <256 x i8>, ptr %x @@ -6166,9 +6166,9 @@ ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle16.v v16, (a0) -; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vredmaxu.vs v8, v8, v24 +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <128 x i16>, ptr %x @@ -6281,9 +6281,9 @@ ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: vmv.s.x v24, zero ; CHECK-NEXT: vmaxu.vv v8, v8, v16 -; CHECK-NEXT: vredmaxu.vs v8, v8, v24 +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vredmaxu.vs v8, v8, v16 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <64 x i32>, ptr %x @@ -6298,10 +6298,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_umax_v1i64: @@ -6440,9 +6440,9 @@ ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle64.v v16, (a0) -; RV32-NEXT: vmv.s.x v24, zero ; RV32-NEXT: vmaxu.vv v8, v8, v16 -; RV32-NEXT: vredmaxu.vs v8, v8, v24 +; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vredmaxu.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -6456,9 +6456,9 @@ ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle64.v v16, (a0) -; RV64-NEXT: vmv.s.x v24, zero ; RV64-NEXT: vmaxu.vv v8, v8, v16 -; RV64-NEXT: vredmaxu.vs v8, v8, v24 +; RV64-NEXT: vmv.s.x v16, zero +; RV64-NEXT: vredmaxu.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <32 x i64>, ptr %x @@ -7045,10 +7045,10 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsrl.vx v9, v8, a0 -; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vreduce_mul_v1i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -1160,25 +1160,25 @@ ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v2, v0 +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: addi a0, a2, -16 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: addi a1, a2, -16 -; CHECK-NEXT: sltu a3, a2, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: vmfeq.vv v1, v16, v8, v0.t ; CHECK-NEXT: bltu a2, a0, .LBB87_2 @@ -1186,7 +1186,7 @@ ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB87_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v2 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll @@ -327,25 +327,25 @@ ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: addi a0, a2, -16 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: addi a1, a2, -16 -; CHECK-NEXT: sltu a3, a2, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: vfsgnj.vv v16, v16, v8, v0.t ; CHECK-NEXT: bltu a2, a0, .LBB26_2 @@ -353,7 +353,7 @@ ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll @@ -657,53 +657,64 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a2) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a2, 128 -; CHECK-NEXT: addi a2, a4, -16 -; CHECK-NEXT: sltu a3, a4, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: addi a3, a0, 128 -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: vle64.v v8, (a3) -; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a2, a0, 128 +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a2) +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: addi a0, a4, -16 +; CHECK-NEXT: sltu a1, a4, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a4, a0, .LBB50_2 ; CHECK-NEXT: # %bb.1: @@ -712,13 +723,13 @@ ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -726,12 +737,13 @@ ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll @@ -327,25 +327,25 @@ ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: addi a0, a2, -16 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: addi a1, a2, -16 -; CHECK-NEXT: sltu a3, a2, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: vfmax.vv v16, v16, v8, v0.t ; CHECK-NEXT: bltu a2, a0, .LBB26_2 @@ -353,7 +353,7 @@ ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll @@ -327,25 +327,25 @@ ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: addi a0, a2, -16 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: addi a1, a2, -16 -; CHECK-NEXT: sltu a3, a2, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: vfmin.vv v16, v16, v8, v0.t ; CHECK-NEXT: bltu a2, a0, .LBB26_2 @@ -353,7 +353,7 @@ ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: .LBB26_2: ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v1 +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll @@ -657,53 +657,64 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a2) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a2, 128 -; CHECK-NEXT: addi a2, a4, -16 -; CHECK-NEXT: sltu a3, a4, a2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: addi a3, a0, 128 -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: vle64.v v8, (a3) -; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: addi a2, a0, 128 +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a2) +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: addi a0, a4, -16 +; CHECK-NEXT: sltu a1, a4, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a4, a0, .LBB50_2 ; CHECK-NEXT: # %bb.1: @@ -712,13 +723,13 @@ ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -726,12 +737,13 @@ ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1703,12 +1703,12 @@ ; RV32-NEXT: .LBB79_2: ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: addi a0, a1, -16 ; RV32-NEXT: sltu a1, a1, a0 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a0, a1, a0 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -1771,14 +1771,14 @@ ; RV32-NEXT: .LBB80_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: addi a1, a2, -16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret @@ -1856,14 +1856,14 @@ ; RV32-NEXT: .LBB81_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: addi a1, a2, -16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret @@ -1943,14 +1943,14 @@ ; RV32-NEXT: .LBB82_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: addi a1, a2, -16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -453,15 +453,42 @@ define <32 x i64> @select_evl_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c) { ; CHECK-LABEL: select_evl_v32i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.select.v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 17) ret <32 x i64> %v @@ -571,46 +598,35 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v24, v0 -; CHECK-NEXT: addi a1, a2, -32 -; CHECK-NEXT: sltu a3, a2, a1 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a1, a3, a1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi a4, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a3, .LBB35_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 32 +; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: .LBB35_2: -; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: addi a0, a2, -32 +; CHECK-NEXT: sltu a1, a2, a0 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 4 +; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir b/llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir --- a/llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir +++ b/llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir @@ -10,15 +10,21 @@ ; CHECK-LABEL: name: test_earlyclobber ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: undef %0.sub_vrm2_0:vrn2m2 = PseudoVLE32_V_M2 $x10, 1, 5 /* e32 */ - ; CHECK-NEXT: %0.sub_vrm2_1:vrn2m2 = PseudoVLE32_V_M2 $x10, 1, 5 /* e32 */ - ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 $x10, 1, 5 /* e32 */ - ; CHECK-NEXT: undef early-clobber %2.sub_vrm2_0:vrn2m2 = PseudoVRGATHER_VI_M2 %0.sub_vrm2_0, 0, 1, 5 /* e32 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: %2.sub_vrm2_1:vrn2m2 = COPY %0.sub_vrm2_1 - ; CHECK-NEXT: PseudoVSUXSEG2EI32_V_M2_M2 %2, $x10, [[PseudoVLE32_V_M2_]], 1, 5 /* e32 */, implicit $vl, implicit $vtype - undef %0.sub_vrm2_0:vrn2m2 = PseudoVLE32_V_M2 $x10, 1, 5 - %0.sub_vrm2_1:vrn2m2 = PseudoVLE32_V_M2 $x10, 1, 5 - %1:vrm2 = PseudoVLE32_V_M2 $x10, 1, 5 + ; CHECK-NEXT: %pt:vrm2 = IMPLICIT_DEF + ; CHECK-NEXT: undef %1.sub_vrm2_0:vrn2m2 = PseudoVLE32_V_M2 %pt, $x10, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: %pt2:vrm2 = IMPLICIT_DEF + ; CHECK-NEXT: %1.sub_vrm2_1:vrn2m2 = PseudoVLE32_V_M2 %pt2, $x10, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: %pt3:vrm2 = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 %pt3, $x10, 1, 5 /* e32 */, 0 /* tu, mu */ + ; CHECK-NEXT: undef early-clobber %5.sub_vrm2_0:vrn2m2 = PseudoVRGATHER_VI_M2 %1.sub_vrm2_0, 0, 1, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %5.sub_vrm2_1:vrn2m2 = COPY %1.sub_vrm2_1 + ; CHECK-NEXT: PseudoVSUXSEG2EI32_V_M2_M2 %5, $x10, [[PseudoVLE32_V_M2_]], 1, 5 /* e32 */, implicit $vl, implicit $vtype + %pt:vrm2 = IMPLICIT_DEF + undef %0.sub_vrm2_0:vrn2m2 = PseudoVLE32_V_M2 %pt, $x10, 1, 5, 0 + %pt2:vrm2 = IMPLICIT_DEF + %0.sub_vrm2_1:vrn2m2 = PseudoVLE32_V_M2 %pt2, $x10, 1, 5, 0 + %pt3:vrm2 = IMPLICIT_DEF + %1:vrm2 = PseudoVLE32_V_M2 %pt3, $x10, 1, 5, 0 undef early-clobber %2.sub_vrm2_0:vrn2m2 = PseudoVRGATHER_VI_M2 %0.sub_vrm2_0:vrn2m2, 0, 1, 5, implicit $vl, implicit $vtype %2.sub_vrm2_1:vrn2m2 = COPY %0.sub_vrm2_1:vrn2m2 PseudoVSUXSEG2EI32_V_M2_M2 %2:vrn2m2, $x10, %1:vrm2, 1, 5, implicit $vl, implicit $vtype diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll @@ -11,6 +11,12 @@ ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: # implicit-def: $v8 +; SPILL-O0-NEXT: # implicit-def: $v9 +; SPILL-O0-NEXT: # implicit-def: $v10 +; SPILL-O0-NEXT: # implicit-def: $v9 +; SPILL-O0-NEXT: # kill: def $v8 killed $v8 def $v8_v9 +; SPILL-O0-NEXT: vmv1r.v v9, v10 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv1r.v v8, v9 @@ -65,6 +71,12 @@ ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: # implicit-def: $v8 +; SPILL-O0-NEXT: # implicit-def: $v9 +; SPILL-O0-NEXT: # implicit-def: $v10 +; SPILL-O0-NEXT: # implicit-def: $v9 +; SPILL-O0-NEXT: # kill: def $v8 killed $v8 def $v8_v9 +; SPILL-O0-NEXT: vmv1r.v v9, v10 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv1r.v v8, v9 @@ -119,6 +131,12 @@ ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: # implicit-def: $v8m2 +; SPILL-O0-NEXT: # implicit-def: $v10m2 +; SPILL-O0-NEXT: # implicit-def: $v12m2 +; SPILL-O0-NEXT: # implicit-def: $v10m2 +; SPILL-O0-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2 +; SPILL-O0-NEXT: vmv2r.v v10, v12 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv2r.v v8, v10 @@ -176,6 +194,12 @@ ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 2 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: # implicit-def: $v8m4 +; SPILL-O0-NEXT: # implicit-def: $v12m4 +; SPILL-O0-NEXT: # implicit-def: $v16m4 +; SPILL-O0-NEXT: # implicit-def: $v12m4 +; SPILL-O0-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4 +; SPILL-O0-NEXT: vmv4r.v v12, v16 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv4r.v v8, v12 @@ -233,6 +257,15 @@ ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: # implicit-def: $v8m2 +; SPILL-O0-NEXT: # implicit-def: $v10m2 +; SPILL-O0-NEXT: # implicit-def: $v16m2 +; SPILL-O0-NEXT: # implicit-def: $v10m2 +; SPILL-O0-NEXT: # implicit-def: $v14m2 +; SPILL-O0-NEXT: # implicit-def: $v10m2 +; SPILL-O0-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2_v12m2 +; SPILL-O0-NEXT: vmv2r.v v10, v16 +; SPILL-O0-NEXT: vmv2r.v v12, v14 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O0-NEXT: vlseg3e32.v v8, (a0) ; SPILL-O0-NEXT: vmv2r.v v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll @@ -11,6 +11,12 @@ ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: # implicit-def: $v8 +; SPILL-O0-NEXT: # implicit-def: $v9 +; SPILL-O0-NEXT: # implicit-def: $v10 +; SPILL-O0-NEXT: # implicit-def: $v9 +; SPILL-O0-NEXT: # kill: def $v8 killed $v8 def $v8_v9 +; SPILL-O0-NEXT: vmv1r.v v9, v10 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv1r.v v8, v9 @@ -65,6 +71,12 @@ ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: # implicit-def: $v8 +; SPILL-O0-NEXT: # implicit-def: $v9 +; SPILL-O0-NEXT: # implicit-def: $v10 +; SPILL-O0-NEXT: # implicit-def: $v9 +; SPILL-O0-NEXT: # kill: def $v8 killed $v8 def $v8_v9 +; SPILL-O0-NEXT: vmv1r.v v9, v10 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv1r.v v8, v9 @@ -119,6 +131,12 @@ ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: # implicit-def: $v8m2 +; SPILL-O0-NEXT: # implicit-def: $v10m2 +; SPILL-O0-NEXT: # implicit-def: $v12m2 +; SPILL-O0-NEXT: # implicit-def: $v10m2 +; SPILL-O0-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2 +; SPILL-O0-NEXT: vmv2r.v v10, v12 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv2r.v v8, v10 @@ -176,6 +194,12 @@ ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 2 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: # implicit-def: $v8m4 +; SPILL-O0-NEXT: # implicit-def: $v12m4 +; SPILL-O0-NEXT: # implicit-def: $v16m4 +; SPILL-O0-NEXT: # implicit-def: $v12m4 +; SPILL-O0-NEXT: # kill: def $v8m4 killed $v8m4 def $v8m4_v12m4 +; SPILL-O0-NEXT: vmv4r.v v12, v16 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; SPILL-O0-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O0-NEXT: vmv4r.v v8, v12 @@ -233,6 +257,15 @@ ; SPILL-O0-NEXT: csrr a2, vlenb ; SPILL-O0-NEXT: slli a2, a2, 1 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: # implicit-def: $v8m2 +; SPILL-O0-NEXT: # implicit-def: $v10m2 +; SPILL-O0-NEXT: # implicit-def: $v16m2 +; SPILL-O0-NEXT: # implicit-def: $v10m2 +; SPILL-O0-NEXT: # implicit-def: $v14m2 +; SPILL-O0-NEXT: # implicit-def: $v10m2 +; SPILL-O0-NEXT: # kill: def $v8m2 killed $v8m2 def $v8m2_v10m2_v12m2 +; SPILL-O0-NEXT: vmv2r.v v10, v16 +; SPILL-O0-NEXT: vmv2r.v v12, v14 ; SPILL-O0-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O0-NEXT: vlseg3e32.v v8, (a0) ; SPILL-O0-NEXT: vmv2r.v v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -1164,33 +1164,33 @@ ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a4, a0, a1 -; CHECK-NEXT: vl8r.v v8, (a4) -; CHECK-NEXT: vl8r.v v0, (a0) +; CHECK-NEXT: add a2, a0, a1 +; CHECK-NEXT: vl8r.v v8, (a2) +; CHECK-NEXT: sub a2, a3, a1 +; CHECK-NEXT: sltu a4, a3, a2 +; CHECK-NEXT: addi a4, a4, -1 +; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: sub a0, a3, a1 -; CHECK-NEXT: vlm.v v0, (a2) -; CHECK-NEXT: sltu a2, a3, a0 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: and a2, a4, a2 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vv v1, v16, v8, v0.t ; CHECK-NEXT: bltu a3, a1, .LBB96_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB96_2: ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vmv1r.v v0, v2 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll @@ -36,16 +36,16 @@ ; CHECK-NEXT: vsrl.vx v12, v8, a1 ; CHECK-NEXT: lui a2, 16 ; CHECK-NEXT: addi a2, a2, -256 -; CHECK-NEXT: vand.vx v12, v12, a2 -; CHECK-NEXT: vor.vv v10, v12, v10 -; CHECK-NEXT: vsrl.vi v12, v8, 24 ; CHECK-NEXT: mv a3, sp ; CHECK-NEXT: vlse64.v v14, (a3), zero +; CHECK-NEXT: vand.vx v12, v12, a2 +; CHECK-NEXT: vor.vv v10, v12, v10 +; CHECK-NEXT: vsrl.vi v12, v8, 8 +; CHECK-NEXT: vand.vv v12, v12, v14 +; CHECK-NEXT: vsrl.vi v16, v8, 24 ; CHECK-NEXT: lui a3, 4080 -; CHECK-NEXT: vand.vx v12, v12, a3 -; CHECK-NEXT: vsrl.vi v16, v8, 8 -; CHECK-NEXT: vand.vv v16, v16, v14 -; CHECK-NEXT: vor.vv v12, v16, v12 +; CHECK-NEXT: vand.vx v16, v16, a3 +; CHECK-NEXT: vor.vv v12, v12, v16 ; CHECK-NEXT: vor.vv v10, v12, v10 ; CHECK-NEXT: vand.vv v12, v8, v14 ; CHECK-NEXT: vsll.vi v12, v12, 8 diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -501,8 +501,8 @@ ; RV32-NEXT: lui a0, 797989 ; RV32-NEXT: addi a0, a0, -683 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: vid.v v16 ; RV32-NEXT: vmul.vv v8, v16, v8 @@ -552,8 +552,8 @@ ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vid.v v8 ; RV32-NEXT: vadd.vv v16, v8, v16 @@ -580,8 +580,8 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vid.v v8 ; RV32-NEXT: vadd.vv v8, v8, v8 @@ -667,8 +667,8 @@ ; RV32-NEXT: mulhu a0, a0, a2 ; RV32-NEXT: add a0, a0, a1 ; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero ; RV32-NEXT: mv a0, sp ; RV32-NEXT: vlse64.v v16, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir b/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir --- a/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir +++ b/llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir @@ -9,22 +9,24 @@ ; CHECK-LABEL: name: test_M4_sub_vrm1_0 ; CHECK: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm4 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_0 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_1 - ; CHECK-NEXT: early-clobber %4:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M4 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M4 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm4 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vr = PseudoVLE32_V_M1 killed %7:gpr, 0, 5 + %pt:vr = IMPLICIT_DEF + %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm4 = INSERT_SUBREG %1:vrm4, %5, %subreg.sub_vrm1_0 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -43,22 +45,24 @@ ; CHECK-LABEL: name: test_M4_sub_vrm1_1 ; CHECK: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm4 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_1 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_0 - ; CHECK-NEXT: early-clobber %4:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M4 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M4 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm4 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vr = PseudoVLE32_V_M1 killed %7:gpr, 0, 5 + %pt:vr = IMPLICIT_DEF + %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm4 = INSERT_SUBREG %1:vrm4, %5, %subreg.sub_vrm1_1 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -77,22 +81,24 @@ ; CHECK-LABEL: name: test_M4_sub_vrm1_2 ; CHECK: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm4 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_2 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_3 - ; CHECK-NEXT: early-clobber %4:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M4 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M4 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm4 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vr = PseudoVLE32_V_M1 killed %7:gpr, 0, 5 + %pt:vr = IMPLICIT_DEF + %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm4 = INSERT_SUBREG %1:vrm4, %5, %subreg.sub_vrm1_2 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -111,22 +117,24 @@ ; CHECK-LABEL: name: test_M4_sub_vrm1_3 ; CHECK: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm4 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_3 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_2 - ; CHECK-NEXT: early-clobber %4:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M4 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M4 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm4 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vr = PseudoVLE32_V_M1 killed %7:gpr, 0, 5 + %pt:vr = IMPLICIT_DEF + %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm4 = INSERT_SUBREG %1:vrm4, %5, %subreg.sub_vrm1_3 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -145,20 +153,22 @@ ; CHECK-LABEL: name: test_M4_sub_vrm2_0 ; CHECK: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vrm2 = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm4 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M2_]], %subreg.sub_vrm2_0 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 - ; CHECK-NEXT: early-clobber %4:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M4 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M4 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm4 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vrm2 = PseudoVLE32_V_M2 killed %7:gpr, 0, 5 + %pt:vrm2 = IMPLICIT_DEF + %5:vrm2 = PseudoVLE32_V_M2 %pt, killed %7:gpr, 0, 5, 0 %6:vrm4 = INSERT_SUBREG %1:vrm4, %5, %subreg.sub_vrm2_0 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -177,20 +187,22 @@ ; CHECK-LABEL: name: test_M4_sub_vrm2_1 ; CHECK: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vrm2 = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm4 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M2_]], %subreg.sub_vrm2_1 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 - ; CHECK-NEXT: early-clobber %4:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M4 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M4 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm4 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vrm2 = PseudoVLE32_V_M2 killed %7:gpr, 0, 5 + %pt:vrm2 = IMPLICIT_DEF + %5:vrm2 = PseudoVLE32_V_M2 %pt, killed %7:gpr, 0, 5, 0 %6:vrm4 = INSERT_SUBREG %1:vrm4, %5, %subreg.sub_vrm2_1 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -210,7 +222,8 @@ ; CHECK-LABEL: name: test_M8_sub_vrm1_0 ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_0 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 @@ -219,15 +232,16 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_1 - ; CHECK-NEXT: early-clobber %4:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M8 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm8 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vr = PseudoVLE32_V_M1 killed %7:gpr, 0, 5 + %pt:vr = IMPLICIT_DEF + %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_0 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -246,7 +260,8 @@ ; CHECK-LABEL: name: test_M8_sub_vrm1_1 ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_1 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 @@ -255,15 +270,16 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_0 - ; CHECK-NEXT: early-clobber %4:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M8 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm8 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vr = PseudoVLE32_V_M1 killed %7:gpr, 0, 5 + %pt:vr = IMPLICIT_DEF + %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_1 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -282,7 +298,8 @@ ; CHECK-LABEL: name: test_M8_sub_vrm1_2 ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_2 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 @@ -291,15 +308,16 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_3 - ; CHECK-NEXT: early-clobber %4:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M8 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm8 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vr = PseudoVLE32_V_M1 killed %7:gpr, 0, 5 + %pt:vr = IMPLICIT_DEF + %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_2 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -318,7 +336,8 @@ ; CHECK-LABEL: name: test_M8_sub_vrm1_3 ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_3 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 @@ -327,15 +346,16 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_2 - ; CHECK-NEXT: early-clobber %4:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M8 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm8 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vr = PseudoVLE32_V_M1 killed %7:gpr, 0, 5 + %pt:vr = IMPLICIT_DEF + %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_3 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -354,7 +374,8 @@ ; CHECK-LABEL: name: test_M8_sub_vrm1_4 ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_4 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 @@ -363,15 +384,16 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_3 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_5 - ; CHECK-NEXT: early-clobber %4:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M8 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm8 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vr = PseudoVLE32_V_M1 killed %7:gpr, 0, 5 + %pt:vr = IMPLICIT_DEF + %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_4 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -390,7 +412,8 @@ ; CHECK-LABEL: name: test_M8_sub_vrm1_5 ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_5 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 @@ -399,15 +422,16 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_3 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_4 - ; CHECK-NEXT: early-clobber %4:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M8 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm8 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vr = PseudoVLE32_V_M1 killed %7:gpr, 0, 5 + %pt:vr = IMPLICIT_DEF + %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_5 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -426,7 +450,8 @@ ; CHECK-LABEL: name: test_M8_sub_vrm1_6 ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_6 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 @@ -435,15 +460,16 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_2 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_7 - ; CHECK-NEXT: early-clobber %4:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M8 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm8 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vr = PseudoVLE32_V_M1 killed %7:gpr, 0, 5 + %pt:vr = IMPLICIT_DEF + %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_6 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -462,7 +488,8 @@ ; CHECK-LABEL: name: test_M8_sub_vrm1_7 ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M1_]], %subreg.sub_vrm1_7 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 @@ -471,15 +498,16 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_2 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_6 - ; CHECK-NEXT: early-clobber %4:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M8 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm8 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vr = PseudoVLE32_V_M1 killed %7:gpr, 0, 5 + %pt:vr = IMPLICIT_DEF + %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_7 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -498,22 +526,24 @@ ; CHECK-LABEL: name: test_M8_sub_vrm2_0 ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vrm2 = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M2_]], %subreg.sub_vrm2_0 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_1 ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 - ; CHECK-NEXT: early-clobber %4:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M8 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm8 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vrm2 = PseudoVLE32_V_M2 killed %7:gpr, 0, 5 + %pt:vrm2 = IMPLICIT_DEF + %5:vrm2 = PseudoVLE32_V_M2 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm2_0 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -532,22 +562,24 @@ ; CHECK-LABEL: name: test_M8_sub_vrm2_1 ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vrm2 = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M2_]], %subreg.sub_vrm2_1 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_1 ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 - ; CHECK-NEXT: early-clobber %4:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M8 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm8 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vrm2 = PseudoVLE32_V_M2 killed %7:gpr, 0, 5 + %pt:vrm2 = IMPLICIT_DEF + %5:vrm2 = PseudoVLE32_V_M2 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm2_1 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -566,22 +598,24 @@ ; CHECK-LABEL: name: test_M8_sub_vrm2_2 ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vrm2 = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M2_]], %subreg.sub_vrm2_2 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_0 ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_3 - ; CHECK-NEXT: early-clobber %4:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M8 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm8 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vrm2 = PseudoVLE32_V_M2 killed %7:gpr, 0, 5 + %pt:vrm2 = IMPLICIT_DEF + %5:vrm2 = PseudoVLE32_V_M2 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm2_2 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -600,22 +634,24 @@ ; CHECK-LABEL: name: test_M8_sub_vrm2_3 ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vrm2 = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M2_]], %subreg.sub_vrm2_3 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_0 ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_2 - ; CHECK-NEXT: early-clobber %4:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M8 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm8 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vrm2 = PseudoVLE32_V_M2 killed %7:gpr, 0, 5 + %pt:vrm2 = IMPLICIT_DEF + %5:vrm2 = PseudoVLE32_V_M2 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm2_3 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -634,20 +670,22 @@ ; CHECK-LABEL: name: test_M8_sub_vrm4_0 ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M4_:%[0-9]+]]:vrm4 = PseudoVLE32_V_M4 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vrm4 = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M4_:%[0-9]+]]:vrm4 = PseudoVLE32_V_M4 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M4_]], %subreg.sub_vrm4_0 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_1 - ; CHECK-NEXT: early-clobber %4:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M8 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm8 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vrm4 = PseudoVLE32_V_M4 killed %7:gpr, 0, 5 + %pt:vrm4 = IMPLICIT_DEF + %5:vrm4 = PseudoVLE32_V_M4 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm4_0 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype @@ -666,20 +704,22 @@ ; CHECK-LABEL: name: test_M8_sub_vrm4_1 ; CHECK: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 8 - ; CHECK-NEXT: [[PseudoVLE32_V_M4_:%[0-9]+]]:vrm4 = PseudoVLE32_V_M4 killed [[ADDI]], 0, 5 /* e32 */ + ; CHECK-NEXT: %pt:vrm4 = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M4_:%[0-9]+]]:vrm4 = PseudoVLE32_V_M4 %pt, killed [[ADDI]], 0, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrm8 = INSERT_SUBREG [[DEF]], [[PseudoVLE32_V_M4_]], %subreg.sub_vrm4_1 ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_0 - ; CHECK-NEXT: early-clobber %4:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 - ; CHECK-NEXT: PseudoVSE32_V_M8 killed %4, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: $x10 = COPY [[COPY]] ; CHECK-NEXT: PseudoRET implicit $x10 %1:vrm8 = IMPLICIT_DEF %7:gpr = ADDI $x0, 8 - %5:vrm4 = PseudoVLE32_V_M4 killed %7:gpr, 0, 5 + %pt:vrm4 = IMPLICIT_DEF + %5:vrm4 = PseudoVLE32_V_M4 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm4_1 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype diff --git a/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll b/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll --- a/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vleff-vlseg2ff-output.ll @@ -14,7 +14,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[PseudoVLE8FF_V_M1_:%[0-9]+]]:vr, [[PseudoVLE8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1 [[COPY1]], [[COPY]], 3 /* e8 */, implicit-def dead $vl + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE8FF_V_M1_:%[0-9]+]]:vr, [[PseudoVLE8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1 [[DEF]], [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.p, align 1) ; CHECK-NEXT: $x10 = COPY [[PseudoVLE8FF_V_M1_1]] ; CHECK-NEXT: PseudoRET implicit $x10 entry: @@ -31,8 +32,8 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v8 - ; CHECK-NEXT: [[PseudoVLE8FF_V_M1_TU:%[0-9]+]]:vr, [[PseudoVLE8FF_V_M1_TU1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1_TU [[COPY2]], [[COPY1]], [[COPY]], 3 /* e8 */, implicit-def dead $vl - ; CHECK-NEXT: $x10 = COPY [[PseudoVLE8FF_V_M1_TU1]] + ; CHECK-NEXT: [[PseudoVLE8FF_V_M1_:%[0-9]+]]:vr, [[PseudoVLE8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1 [[COPY2]], [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.p, align 1) + ; CHECK-NEXT: $x10 = COPY [[PseudoVLE8FF_V_M1_1]] ; CHECK-NEXT: PseudoRET implicit $x10 entry: %0 = call { , i64 } @llvm.riscv.vleff.nxv8i8( %merge, * %p, i64 %vl) @@ -50,7 +51,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8 ; CHECK-NEXT: $v0 = COPY [[COPY1]] - ; CHECK-NEXT: [[PseudoVLE8FF_V_M1_MASK:%[0-9]+]]:vrnov0, [[PseudoVLE8FF_V_M1_MASK1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vl + ; CHECK-NEXT: [[PseudoVLE8FF_V_M1_MASK:%[0-9]+]]:vrnov0, [[PseudoVLE8FF_V_M1_MASK1:%[0-9]+]]:gpr = PseudoVLE8FF_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vl :: (load unknown-size from %ir.p, align 1) ; CHECK-NEXT: $x10 = COPY [[PseudoVLE8FF_V_M1_MASK1]] ; CHECK-NEXT: PseudoRET implicit $x10 entry: @@ -66,7 +67,12 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[PseudoVLSEG2E8FF_V_M1_:%[0-9]+]]:vrn2m1, [[PseudoVLSEG2E8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1 [[COPY1]], [[COPY]], 3 /* e8 */, implicit-def dead $vl + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF3:%[0-9]+]]:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vrn2m1 = REG_SEQUENCE [[DEF]], %subreg.sub_vrm1_0, [[DEF2]], %subreg.sub_vrm1_1 + ; CHECK-NEXT: [[PseudoVLSEG2E8FF_V_M1_:%[0-9]+]]:vrn2m1, [[PseudoVLSEG2E8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1 [[REG_SEQUENCE]], [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.base, align 1) ; CHECK-NEXT: $x10 = COPY [[PseudoVLSEG2E8FF_V_M1_1]] ; CHECK-NEXT: PseudoRET implicit $x10 entry: @@ -84,8 +90,8 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v8 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vrn2m1 = REG_SEQUENCE [[COPY2]], %subreg.sub_vrm1_0, [[COPY2]], %subreg.sub_vrm1_1 - ; CHECK-NEXT: [[PseudoVLSEG2E8FF_V_M1_TU:%[0-9]+]]:vrn2m1, [[PseudoVLSEG2E8FF_V_M1_TU1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1_TU [[REG_SEQUENCE]], [[COPY1]], [[COPY]], 3 /* e8 */, implicit-def dead $vl - ; CHECK-NEXT: $x10 = COPY [[PseudoVLSEG2E8FF_V_M1_TU1]] + ; CHECK-NEXT: [[PseudoVLSEG2E8FF_V_M1_:%[0-9]+]]:vrn2m1, [[PseudoVLSEG2E8FF_V_M1_1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1 [[REG_SEQUENCE]], [[COPY1]], [[COPY]], 3 /* e8 */, 2 /* tu, ma */, implicit-def dead $vl :: (load unknown-size from %ir.base, align 1) + ; CHECK-NEXT: $x10 = COPY [[PseudoVLSEG2E8FF_V_M1_1]] ; CHECK-NEXT: PseudoRET implicit $x10 entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv8i8( %val, %val, i8* %base, i64 %vl) @@ -104,7 +110,7 @@ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vr = COPY $v8 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vrn2m1nov0 = REG_SEQUENCE [[COPY3]], %subreg.sub_vrm1_0, [[COPY3]], %subreg.sub_vrm1_1 ; CHECK-NEXT: $v0 = COPY [[COPY1]] - ; CHECK-NEXT: [[PseudoVLSEG2E8FF_V_M1_MASK:%[0-9]+]]:vrn2m1nov0, [[PseudoVLSEG2E8FF_V_M1_MASK1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1_MASK [[REG_SEQUENCE]], [[COPY2]], $v0, [[COPY]], 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vl + ; CHECK-NEXT: [[PseudoVLSEG2E8FF_V_M1_MASK:%[0-9]+]]:vrn2m1nov0, [[PseudoVLSEG2E8FF_V_M1_MASK1:%[0-9]+]]:gpr = PseudoVLSEG2E8FF_V_M1_MASK [[REG_SEQUENCE]], [[COPY2]], $v0, [[COPY]], 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vl :: (load unknown-size from %ir.base, align 1) ; CHECK-NEXT: $x10 = COPY [[PseudoVLSEG2E8FF_V_M1_MASK1]] ; CHECK-NEXT: PseudoRET implicit $x10 entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir b/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir --- a/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir @@ -13,10 +13,10 @@ ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 82 /* e32, m4, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v28m4 = PseudoVLE32_V_M4 undef $v28m4, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v12m2 = VMV2R_V $v28m2 $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype - $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + $v28m4 = PseudoVLE32_V_M4 undef $v28m4, killed $x16, $noreg, 5, 0, implicit $vl, implicit $vtype $v12m2 = COPY $v28m2 ... --- @@ -30,10 +30,10 @@ ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 82 /* e32, m4, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v28m4 = PseudoVLE32_V_M4 undef $v28m4, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v12m4 = PseudoVMV_V_V_M4 undef $v12m4, $v28m4, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype - $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + $v28m4 = PseudoVLE32_V_M4 undef $v28m4, killed $x16, $noreg, 5, 0, implicit $vl, implicit $vtype $v12m4 = COPY $v28m4 ... --- @@ -82,11 +82,11 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 82 /* e32, m4, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: $v28m4 = PseudoVMV_V_I_M4 undef $v28m4, 0, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v4m4, $x0 = PseudoVLE32FF_V_M4 $x16, $noreg, 5 /* e32 */, implicit-def $vl + ; CHECK-NEXT: $v4m4, $x0 = PseudoVLE32FF_V_M4 undef $v4m4, $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit-def $vl ; CHECK-NEXT: $v12m4 = VMV4R_V $v28m4 $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype $v28m4 = PseudoVMV_V_I_M4 undef $v28m4, 0, $noreg, 5, 0, implicit $vl, implicit $vtype - $v4m4,$x0 = PseudoVLE32FF_V_M4 $x16, $noreg, 5, implicit-def $vl + $v4m4,$x0 = PseudoVLE32FF_V_M4 undef $v4m4, $x16, $noreg, 5, 0, implicit-def $vl $v12m4 = COPY $v28m4 ... --- @@ -101,18 +101,18 @@ ; CHECK: liveins: $x14, $x16, $x17, $x18 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 82 /* e32, m4, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v28m4 = PseudoVLE32_V_M4 undef $v28m4, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $x15 = PseudoVSETVLI $x17, 73 /* e16, m2, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v0m2 = PseudoVLE32_V_M2 $x18, $noreg, 4 /* e16 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v0m2 = PseudoVLE32_V_M2 undef $v0m2, $x18, $noreg, 4 /* e16 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $x0 = PseudoVSETVLIX0 $x0, 82 /* e32, m4, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v4m4 = PseudoVLE32_V_M4 killed $x18, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v4m4 = PseudoVLE32_V_M4 undef $v4m4, killed $x18, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v12m4 = VMV4R_V $v28m4 $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype - $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + $v28m4 = PseudoVLE32_V_M4 undef $v28m4, killed $x16, $noreg, 5, 0, implicit $vl, implicit $vtype $x15 = PseudoVSETVLI $x17, 73, implicit-def $vl, implicit-def $vtype - $v0m2 = PseudoVLE32_V_M2 $x18, $noreg, 4, implicit $vl, implicit $vtype + $v0m2 = PseudoVLE32_V_M2 undef $v0m2, $x18, $noreg, 4, 0, implicit $vl, implicit $vtype $x0 = PseudoVSETVLIX0 $x0, 82, implicit-def $vl, implicit-def $vtype - $v4m4 = PseudoVLE32_V_M4 killed $x18, $noreg, 5, implicit $vl, implicit $vtype + $v4m4 = PseudoVLE32_V_M4 undef $v4m4, killed $x18, $noreg, 5, 0, implicit $vl, implicit $vtype $v12m4 = COPY $v28m4 ... --- @@ -127,18 +127,18 @@ ; CHECK: liveins: $x14, $x16, $x17, $x18 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 82 /* e32, m4, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v28m4 = PseudoVLE32_V_M4 undef $v28m4, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $x0 = PseudoVSETVLIX0 $x0, 73 /* e16, m2, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v0m2 = PseudoVLE32_V_M2 $x18, $noreg, 4 /* e16 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v0m2 = PseudoVLE32_V_M2 undef $v0m2, $x18, $noreg, 4 /* e16 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $x0 = PseudoVSETVLIX0 $x0, 82 /* e32, m4, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v4m4 = PseudoVLE32_V_M4 killed $x18, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v4m4 = PseudoVLE32_V_M4 undef $v4m4, killed $x18, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v12m4 = PseudoVMV_V_V_M4 undef $v12m4, $v28m4, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype - $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + $v28m4 = PseudoVLE32_V_M4 undef $v28m4, killed $x16, $noreg, 5, 0, implicit $vl, implicit $vtype $x0 = PseudoVSETVLIX0 $x0, 73, implicit-def $vl, implicit-def $vtype - $v0m2 = PseudoVLE32_V_M2 $x18, $noreg, 4, implicit $vl, implicit $vtype + $v0m2 = PseudoVLE32_V_M2 undef $v0m2, $x18, $noreg, 4, 0, implicit $vl, implicit $vtype $x0 = PseudoVSETVLIX0 $x0, 82, implicit-def $vl, implicit-def $vtype - $v4m4 = PseudoVLE32_V_M4 killed $x18, $noreg, 5, implicit $vl, implicit $vtype + $v4m4 = PseudoVLE32_V_M4 undef $v4m4, killed $x18, $noreg, 5, 0, implicit $vl, implicit $vtype $v12m4 = COPY $v28m4 ... --- @@ -153,14 +153,14 @@ ; CHECK: liveins: $x14, $x16, $x17, $x18 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 82 /* e32, m4, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v28m4 = PseudoVLE32_V_M4 undef $v28m4, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $x0 = PseudoVSETVLIX0 $x0, 73 /* e16, m2, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v0m2 = PseudoVLE32_V_M2 $x18, $noreg, 4 /* e16 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v0m2 = PseudoVLE32_V_M2 undef $v0m2, $x18, $noreg, 4 /* e16 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v12m4 = VMV4R_V $v28m4 $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype - $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + $v28m4 = PseudoVLE32_V_M4 undef $v28m4, killed $x16, $noreg, 5, 0, implicit $vl, implicit $vtype $x0 = PseudoVSETVLIX0 $x0, 73, implicit-def $vl, implicit-def $vtype - $v0m2 = PseudoVLE32_V_M2 $x18, $noreg, 4, implicit $vl, implicit $vtype + $v0m2 = PseudoVLE32_V_M2 undef $v0m2, $x18, $noreg, 4, 0, implicit $vl, implicit $vtype $v12m4 = COPY $v28m4 ... --- @@ -174,13 +174,13 @@ ; CHECK: liveins: $x16, $x17 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETIVLI 4, 73 /* e16, m2, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v26m2 = PseudoVLE16_V_M2 killed $x16, $noreg, 4 /* e16 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: $v8m2 = PseudoVLE16_V_M2 killed $x17, $noreg, 4 /* e16 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v26m2 = PseudoVLE16_V_M2 undef $v26m2, killed $x16, $noreg, 4 /* e16 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v8m2 = PseudoVLE16_V_M2 undef $v8m2, killed $x17, $noreg, 4 /* e16 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: early-clobber $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2, $noreg, 4 /* e16 */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v12m2 = VMV2R_V $v28m2 $x15 = PseudoVSETIVLI 4, 73, implicit-def $vl, implicit-def $vtype - $v26m2 = PseudoVLE16_V_M2 killed $x16, $noreg, 4, implicit $vl, implicit $vtype - $v8m2 = PseudoVLE16_V_M2 killed $x17, $noreg, 4, implicit $vl, implicit $vtype + $v26m2 = PseudoVLE16_V_M2 undef $v26m2, killed $x16, $noreg, 4, 0, implicit $vl, implicit $vtype + $v8m2 = PseudoVLE16_V_M2 undef $v8m2, killed $x17, $noreg, 4, 0, implicit $vl, implicit $vtype $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2, $noreg, 4, implicit $vl, implicit $vtype $v12m2 = COPY $v28m2 ... @@ -196,11 +196,11 @@ ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 82 /* e32, m4, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v28m4 = PseudoVLE32_V_M4 undef $v28m4, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $x0 = PseudoVSETVLIX0 $x0, 74 /* e16, m4, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: $v12m4 = VMV4R_V $v28m4 $x15 = PseudoVSETVLI $x14, 82, implicit-def $vl, implicit-def $vtype - $v28m4 = PseudoVLE32_V_M4 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + $v28m4 = PseudoVLE32_V_M4 undef $v28m4, killed $x16, $noreg, 5, 0, implicit $vl, implicit $vtype $x0 = PseudoVSETVLIX0 $x0, 74, implicit-def $vl, implicit-def $vtype $v12m4 = COPY $v28m4 ... @@ -235,10 +235,10 @@ ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 80 /* e32, m1, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v8_v9 = PseudoVLSEG2E32_V_M1 killed $x16, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v8_v9 = PseudoVLSEG2E32_V_M1 undef $v8_v9, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v10 = VMV1R_V $v8 $x15 = PseudoVSETVLI $x14, 80, implicit-def $vl, implicit-def $vtype - $v8_v9 = PseudoVLSEG2E32_V_M1 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + $v8_v9 = PseudoVLSEG2E32_V_M1 undef $v8_v9, killed $x16, $noreg, 5, 0, implicit $vl, implicit $vtype $v10 = COPY $v8 ... --- @@ -252,11 +252,11 @@ ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 80 /* e32, m1, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v8_v9 = PseudoVLSEG2E32_V_M1 killed $x16, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v8_v9 = PseudoVLSEG2E32_V_M1 undef $v8_v9, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v10 = PseudoVMV_V_V_M1 undef $v10, $v8, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v11 = PseudoVMV_V_V_M1 undef $v11, $v9, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype $x15 = PseudoVSETVLI $x14, 80, implicit-def $vl, implicit-def $vtype - $v8_v9 = PseudoVLSEG2E32_V_M1 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + $v8_v9 = PseudoVLSEG2E32_V_M1 undef $v8_v9, killed $x16, $noreg, 5, 0, implicit $vl, implicit $vtype $v10_v11 = COPY $v8_v9 ... --- @@ -270,10 +270,10 @@ ; CHECK: liveins: $x14, $x16 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x15 = PseudoVSETVLI $x14, 87 /* e32, mf2, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v28 = PseudoVLE32_V_MF2 killed $x16, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v28 = PseudoVLE32_V_MF2 undef $v28, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v12 = VMV1R_V $v28 $x15 = PseudoVSETVLI $x14, 87, implicit-def $vl, implicit-def $vtype - $v28 = PseudoVLE32_V_MF2 killed $x16, $noreg, 5, implicit $vl, implicit $vtype + $v28 = PseudoVLE32_V_MF2 undef $v28, killed $x16, $noreg, 5, 0, implicit $vl, implicit $vtype $v12 = COPY $v28 ... --- @@ -287,9 +287,9 @@ ; CHECK: liveins: $x12, $x14, $x16 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x0 = PseudoVSETVLI $x14, 80 /* e32, m1, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v8_v9_v10_v11_v12_v13_v14_v15 = PseudoVLSEG8E32_V_M1 killed $x12, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v8_v9_v10_v11_v12_v13_v14_v15 = PseudoVLSEG8E32_V_M1 undef $v8_v9_v10_v11_v12_v13_v14_v15, killed $x12, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $x0 = PseudoVSETIVLI 10, 80 /* e32, m1, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v15 = PseudoVLE32_V_M1 killed $x16, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype, implicit killed $v8_v9_v10_v11_v12_v13_v14_v15, implicit-def $v8_v9_v10_v11_v12_v13_v14_v15 + ; CHECK-NEXT: $v15 = PseudoVLE32_V_M1 undef $v15, killed $x16, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype, implicit killed $v8_v9_v10_v11_v12_v13_v14_v15, implicit-def $v8_v9_v10_v11_v12_v13_v14_v15 ; CHECK-NEXT: $v24 = VMV1R_V killed $v8 ; CHECK-NEXT: $v25 = VMV1R_V killed $v9 ; CHECK-NEXT: $v26 = VMV1R_V killed $v10 @@ -299,9 +299,9 @@ ; CHECK-NEXT: $v30 = VMV1R_V killed $v14 ; CHECK-NEXT: $v31 = VMV1R_V killed $v15 $x0 = PseudoVSETVLI $x14, 80, implicit-def $vl, implicit-def $vtype - $v8_v9_v10_v11_v12_v13_v14_v15 = PseudoVLSEG8E32_V_M1 killed $x12, $noreg, 5, implicit $vl, implicit $vtype + $v8_v9_v10_v11_v12_v13_v14_v15 = PseudoVLSEG8E32_V_M1 undef $v8_v9_v10_v11_v12_v13_v14_v15, killed $x12, $noreg, 5, 0, implicit $vl, implicit $vtype $x0 = PseudoVSETIVLI 10, 80, implicit-def $vl, implicit-def $vtype - $v15 = PseudoVLE32_V_M1 killed $x16, $noreg, 5, implicit $vl, implicit $vtype, implicit killed $v8_v9_v10_v11_v12_v13_v14_v15, implicit-def $v8_v9_v10_v11_v12_v13_v14_v15 + $v15 = PseudoVLE32_V_M1 undef $v15, killed $x16, $noreg, 5, 0, implicit $vl, implicit $vtype, implicit killed $v8_v9_v10_v11_v12_v13_v14_v15, implicit-def $v8_v9_v10_v11_v12_v13_v14_v15 $v24_v25_v26_v27_v28_v29_v30_v31 = COPY killed $v8_v9_v10_v11_v12_v13_v14_v15 ... --- @@ -314,12 +314,12 @@ ; CHECK: liveins: $x10, $x11, $v8, $v9 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: $x0 = PseudoVSETVLI $x10, 201 /* e16, m2, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v10m2 = PseudoVLE16_V_M2 killed $x11, $noreg, 4 /* e16 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: $v10m2 = PseudoVLE16_V_M2 undef $v10m2, killed $x11, $noreg, 4 /* e16 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v10 = VMV1R_V $v8 ; CHECK-NEXT: $v11 = VMV1R_V $v9 ; CHECK-NEXT: $v12m2 = VMV2R_V $v10m2 $x0 = PseudoVSETVLI $x10, 201, implicit-def $vl, implicit-def $vtype - $v10m2 = PseudoVLE16_V_M2 killed $x11, $noreg, 4, implicit $vl, implicit $vtype + $v10m2 = PseudoVLE16_V_M2 undef $v10m2, killed $x11, $noreg, 4, 0, implicit $vl, implicit $vtype $v10 = COPY $v8 $v11 = COPY $v9 $v12m2 = COPY $v10m2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -380,18 +380,18 @@ ; CHECK-NEXT: vmv8r.v v24, v16 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a4, a0, a1 -; CHECK-NEXT: vl8r.v v16, (a4) +; CHECK-NEXT: add a2, a0, a1 +; CHECK-NEXT: vl8r.v v16, (a2) +; CHECK-NEXT: sub a2, a3, a1 +; CHECK-NEXT: sltu a4, a3, a2 +; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: vl8r.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: sub a0, a3, a1 -; CHECK-NEXT: vlm.v v0, (a2) -; CHECK-NEXT: sltu a2, a3, a0 -; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: and a0, a2, a0 -; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma +; CHECK-NEXT: and a2, a4, a2 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma ; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 ; CHECK-NEXT: bltu a3, a1, .LBB28_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -196,8 +196,9 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v8 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY2]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY2]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: BEQ [[COPY3]], [[COPY4]], %bb.2 ; CHECK-NEXT: PseudoBR %bb.1 @@ -225,7 +226,8 @@ %6:vr = COPY $v8 %5:gpr = COPY $x11 %4:gpr = COPY $x10 - %0:vr = PseudoVLE64_V_M1 %5, %7, 6 + %pt:vr = IMPLICIT_DEF + %0:vr = PseudoVLE64_V_M1 %pt, %5, %7, 6, 0 %8:gpr = COPY $x0 BEQ %4, %8, %bb.2 PseudoBR %bb.1 @@ -275,8 +277,9 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x12 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 215 /* e32, mf2, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 [[COPY2]], $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 %pt, [[COPY2]], $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x0 ; CHECK-NEXT: BEQ [[COPY3]], [[COPY4]], %bb.2 ; CHECK-NEXT: PseudoBR %bb.1 @@ -308,7 +311,8 @@ %6:gpr = COPY $x12 %5:gpr = COPY $x11 %4:gpr = COPY $x10 - %0:vr = PseudoVLE32_V_MF2 %5, %7, 5 + %pt:vr = IMPLICIT_DEF + %0:vr = PseudoVLE32_V_MF2 %pt, %5, %7, 5, 0 %8:gpr = COPY $x0 BEQ %4, %8, %bb.2 PseudoBR %bb.1 @@ -781,7 +785,8 @@ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[COPY1]], %bb.0, %5, %bb.1 ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gpr = PHI [[ADDIW]], %bb.0, %4, %bb.1 ; CHECK-NEXT: [[PHI2:%[0-9]+]]:vr = PHI [[COPY3]], %bb.0, %16, %bb.1 - ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 [[PHI]], 4, 5 /* e32 */, implicit $vl, implicit $vtype :: (load (s128) from %ir.lsr.iv12, align 4) + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 %pt, [[PHI]], 4, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.lsr.iv12, align 4) ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE32_V_M1_]], [[PHI2]], 4, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = nsw ADDI [[PHI1]], -4 ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI [[PHI]], 16 @@ -816,7 +821,8 @@ %0:gpr = PHI %6, %bb.0, %5, %bb.1 %1:gpr = PHI %9, %bb.0, %4, %bb.1 %2:vr = PHI %10, %bb.0, %16, %bb.1 - %14:vr = PseudoVLE32_V_M1 %0, 4, 5 :: (load (s128) from %ir.lsr.iv12, align 4) + %pt:vr = IMPLICIT_DEF + %14:vr = PseudoVLE32_V_M1 %pt, %0, 4, 5, 0 :: (load (s128) from %ir.lsr.iv12, align 4) %16:vr = PseudoVADD_VV_M1 killed %14, %2, 4, 5 %4:gpr = nsw ADDI %1, -4 %5:gpr = ADDI %0, 16 @@ -956,7 +962,8 @@ ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD %src, [[PHI]] - ; CHECK-NEXT: [[PseudoVLE8_V_MF8_:%[0-9]+]]:vrnov0 = PseudoVLE8_V_MF8 killed [[ADD1]], -1, 3 /* e8 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %pt2:vrnov0 = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVLE8_V_MF8_:%[0-9]+]]:vrnov0 = PseudoVLE8_V_MF8 %pt2, killed [[ADD1]], -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl ; CHECK-NEXT: [[PseudoVADD_VI_MF8_:%[0-9]+]]:vrnov0 = PseudoVADD_VI_MF8 [[PseudoVLE8_V_MF8_]], 4, -1, 3 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD %dst, [[PHI]] @@ -1004,7 +1011,8 @@ successors: %bb.3(0x80000000) %66:gpr = ADD %src, %26 - %67:vrnov0 = PseudoVLE8_V_MF8 killed %66, -1, 3 + %pt2:vrnov0 = IMPLICIT_DEF + %67:vrnov0 = PseudoVLE8_V_MF8 %pt2, killed %66, -1, 3, 0 %76:vrnov0 = PseudoVADD_VI_MF8 %67, 4, -1, 3 %77:gpr = ADD %dst, %26 PseudoVSE8_V_MF8 killed %76, killed %77, -1, 3 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir @@ -160,15 +160,17 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v8 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY2]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY2]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]] ; CHECK-NEXT: PseudoRET implicit $v8 %2:gprnox0 = COPY $x11 %1:vr = COPY $v8 %0:gpr = COPY $x10 - %3:vr = PseudoVLE64_V_M1 %0, %2, 6 + %pt:vr = IMPLICIT_DEF + %3:vr = PseudoVLE64_V_M1 %pt, %0, %2, 6, 0 %4:vr = PseudoVADD_VV_M1 killed %3, %1, %2, 6 $v8 = COPY %4 PseudoRET implicit $v8 @@ -198,15 +200,17 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 [[COPY1]], $noreg, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVLE32_V_MF2_:%[0-9]+]]:vr = PseudoVLE32_V_MF2 %pt, [[COPY1]], $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: %dead:vr = IMPLICIT_DEF ; CHECK-NEXT: early-clobber %3:vr = PseudoVZEXT_VF2_M1 %dead, killed [[PseudoVLE32_V_MF2_]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v8 = COPY %3 ; CHECK-NEXT: PseudoRET implicit $v8 %1:gprnox0 = COPY $x11 %0:gpr = COPY $x10 - %2:vr = PseudoVLE32_V_MF2 %0, %1, 5 + %pt:vr = IMPLICIT_DEF + %2:vr = PseudoVLE32_V_MF2 %pt, %0, %1, 5, 0 %dead:vr = IMPLICIT_DEF early-clobber %3:vr = PseudoVZEXT_VF2_M1 %dead, killed %2, %1, 6, 0 $v8 = COPY %3 @@ -268,16 +272,20 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x11 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY1]], 2, 6 /* e64 */, implicit $vl, implicit $vtype :: (load (s128) from %ir.x) - ; CHECK-NEXT: [[PseudoVLE64_V_M1_1:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6 /* e64 */, implicit $vl, implicit $vtype :: (load (s128) from %ir.y) + ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY1]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.x) + ; CHECK-NEXT: [[PseudoVLE64_V_M1_1:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt2, [[COPY]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.y) ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], killed [[PseudoVLE64_V_M1_1]], 2, 6 /* e64 */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoVSE64_V_M1 killed [[PseudoVADD_VV_M1_]], [[COPY1]], 2, 6 /* e64 */, implicit $vl, implicit $vtype :: (store (s128) into %ir.x) ; CHECK-NEXT: PseudoRET %1:gpr = COPY $x11 %0:gpr = COPY $x10 - %2:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load (s128) from %ir.x) - %3:vr = PseudoVLE64_V_M1 %1, 2, 6 :: (load (s128) from %ir.y) + %pt:vr = IMPLICIT_DEF + %pt2:vr = IMPLICIT_DEF + %2:vr = PseudoVLE64_V_M1 %pt, %0, 2, 6, 0 :: (load (s128) from %ir.x) + %3:vr = PseudoVLE64_V_M1 %pt2, %1, 2, 6, 0 :: (load (s128) from %ir.y) %4:vr = PseudoVADD_VV_M1 killed %2, killed %3, 2, 6 PseudoVSE64_V_M1 killed %4, %0, 2, 6 :: (store (s128) into %ir.x) PseudoRET @@ -307,8 +315,9 @@ ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY]], 2, 6 /* e64 */, implicit $vl, implicit $vtype :: (load (s128) from %ir.x) + ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.x) ; CHECK-NEXT: dead [[PseudoVSETVLIX0_:%[0-9]+]]:gpr = PseudoVSETVLIX0 $x0, 152 /* e64, m1, tu, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 undef $v2, 0, -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF @@ -318,7 +327,8 @@ ; CHECK-NEXT: $x10 = COPY [[PseudoVMV_X_S_M1_]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gpr = COPY $x10 - %1:vr = PseudoVLE64_V_M1 %0, 2, 6 :: (load (s128) from %ir.x) + %pt:vr = IMPLICIT_DEF + %1:vr = PseudoVLE64_V_M1 %pt, %0, 2, 6, 0 :: (load (s128) from %ir.x) %2:vr = PseudoVMV_V_I_M1 undef $v2, 0, -1, 6, 0 %4:vr = IMPLICIT_DEF %3:vr = PseudoVREDSUM_VS_M1_E8 %4, killed %1, killed %2, 2, 6, 1 @@ -394,8 +404,9 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x11 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v8 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10 + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 [[COPY2]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY2]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype @@ -404,7 +415,8 @@ %2:gprnox0 = COPY $x11 %1:vr = COPY $v8 %0:gpr = COPY $x10 - %3:vr = PseudoVLE64_V_M1 %0, %2, 6 + %pt:vr = IMPLICIT_DEF + %3:vr = PseudoVLE64_V_M1 %pt, %0, %2, 6, 0 INLINEASM &"", 1 /* sideeffect attdialect */ %4:vr = PseudoVADD_VV_M1 killed %3, %1, %2, 6 $v8 = COPY %4 diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir @@ -27,8 +27,8 @@ ; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 3 ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12 ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 - ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 renamable $x10, $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 152 /* e64, m1, tu, ma */, implicit-def $vl, implicit-def $vtype + ; CHECK-NEXT: $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 undef $v0_v1_v2_v3_v4_v5_v6, renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $x11 = ADDI $x2, 16 ; CHECK-NEXT: $x12 = PseudoReadVLENB ; CHECK-NEXT: VS1R_V $v0, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store unknown-size into %stack.0, align 8) @@ -67,7 +67,7 @@ ; CHECK-NEXT: PseudoRET %0:gpr = COPY $x10 %1:gprnox0 = COPY $x11 - $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 %0, %1, 6 + $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 undef $v0_v1_v2_v3_v4_v5_v6, %0, %1, 6, 0 PseudoVSPILL7_M1 killed renamable $v0_v1_v2_v3_v4_v5_v6, %stack.0 :: (store unknown-size into %stack.0, align 8) renamable $v7_v8_v9_v10_v11_v12_v13 = PseudoVRELOAD7_M1 %stack.0 :: (load unknown-size from %stack.0, align 8) VS1R_V killed $v8, %0:gpr