Index: llvm/lib/Target/RISCV/RISCVInstrInfo.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2540,10 +2540,12 @@ MachineBasicBlock &MBB = *MI.getParent(); MachineInstrBuilder MIB = BuildMI(MBB, MI, MI.getDebugLoc(), get(NewOpc)) .add(MI.getOperand(0)) + .addReg(MI.getOperand(0).getReg(), RegState::Undef) .add(MI.getOperand(1)) .add(MI.getOperand(2)) .add(MI.getOperand(3)) - .add(MI.getOperand(4)); + .add(MI.getOperand(4)) + .add(MI.getOperand(5)); MIB.copyImplicitOps(MI); if (LV) { Index: llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -1110,21 +1110,6 @@ VReg Op1Class, DAGOperand Op2Class, string Constraint> : - Pseudo<(outs RetClass:$rd), - (ins Op1Class:$rs2, Op2Class:$rs1, ixlenimm:$rm, AVL:$vl, ixlenimm:$sew), []>, - RISCVVPseudo { - let mayLoad = 0; - let mayStore = 0; - let Constraints = Constraint; - let HasVLOp = 1; - let HasSEWOp = 1; - let HasRoundModeOp = 1; -} - -class VPseudoBinaryNoMaskTURoundingMode : Pseudo<(outs RetClass:$rd), (ins RetClass:$merge, Op1Class:$rs2, Op2Class:$rs1, ixlenimm:$rm, AVL:$vl, ixlenimm:$sew, ixlenimm:$policy), []>, @@ -1259,6 +1244,22 @@ let HasVecPolicyOp = 1; } +// Like VPseudoBinaryNoMask, but output can be V0. +class VPseudoBinaryMOutNoMask : + Pseudo<(outs RetClass:$rd), + (ins Op1Class:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>, + RISCVVPseudo { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = Constraint; + let HasVLOp = 1; + let HasSEWOp = 1; +} + // Like VPseudoBinaryMask, but output can be V0. class VPseudoBinaryMOutMask { let VLMul = MInfo.value in { defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); - def suffix : VPseudoBinaryNoMask; - def suffix # "_TU" : VPseudoBinaryNoMaskTU; + def suffix : VPseudoBinaryNoMaskTU; def suffix # "_MASK" : VPseudoBinaryMaskPolicy, - RISCVMaskedPseudo; + RISCVMaskedPseudo; } } @@ -1893,14 +1893,13 @@ defvar suffix = !if(sew, "_" # MInfo.MX # "_E" # sew, "_" # MInfo.MX); def suffix : VPseudoBinaryNoMaskRoundingMode; - def suffix # "_TU" :VPseudoBinaryNoMaskTURoundingMode; def suffix # "_MASK" : VPseudoBinaryMaskPolicyRoundingMode, - RISCVMaskedPseudo; + RISCVMaskedPseudo; } } @@ -1910,8 +1909,8 @@ LMULInfo MInfo, string Constraint = ""> { let VLMul = MInfo.value in { - def "_" # MInfo.MX : VPseudoBinaryNoMask; + def "_" # MInfo.MX : VPseudoBinaryMOutNoMask; let ForceTailAgnostic = true in def "_" # MInfo.MX # "_MASK" : VPseudoBinaryMOutMask, @@ -1928,13 +1927,13 @@ int sew = 0> { let VLMul = lmul.value in { defvar suffix = !if(sew, "_" # lmul.MX # "_E" # sew, "_" # lmul.MX); - def suffix # "_" # emul.MX : VPseudoBinaryNoMask; - def suffix # "_" # emul.MX # "_TU": VPseudoBinaryNoMaskTU; + def suffix # "_" # emul.MX : VPseudoBinaryNoMaskTU; def suffix # "_" # emul.MX # "_MASK" : VPseudoBinaryMaskPolicy, - RISCVMaskedPseudo; + RISCVMaskedPseudo; } } @@ -3893,24 +3892,6 @@ (op2_type op2_kind:$rs2), GPR:$vl, sew)>; -class VPatBinaryNoMask : - Pat<(result_type (!cast(intrinsic_name) - (result_type (undef)), - (op1_type op1_reg_class:$rs1), - (op2_type op2_kind:$rs2), - VLOpFrag)), - (!cast(inst) - (op1_type op1_reg_class:$rs1), - (op2_type op2_kind:$rs2), - GPR:$vl, sew)>; - class VPatBinaryNoMaskTU(inst#"_TU") + (!cast(inst) (result_type result_reg_class:$merge), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), @@ -3946,10 +3927,11 @@ (XLenVT timm:$round), VLOpFrag)), (!cast(inst) + (result_type (IMPLICIT_DEF)), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), (XLenVT timm:$round), - GPR:$vl, sew)>; + GPR:$vl, sew, TA_MA)>; class VPatBinaryNoMaskTURoundingMode(inst#"_TU") + (!cast(inst) (result_type result_reg_class:$merge), (op1_type op1_reg_class:$rs1), (op2_type op2_kind:$rs2), @@ -4405,8 +4387,6 @@ VReg result_reg_class, VReg op1_reg_class, DAGOperand op2_kind> { - def : VPatBinaryNoMask; def : VPatBinaryNoMaskTU; def : VPatBinaryMaskTA.Predicates in { - def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector (undef)), - (vti.Vector vti.RegClass:$rs2), - (vti.Vector vti.RegClass:$rs1), - VLOpFrag)), - (!cast("PseudoVSUB_VV_"#vti.LMul.MX) vti.RegClass:$rs1, - vti.RegClass:$rs2, - GPR:$vl, - vti.Log2SEW)>; def : Pat<(vti.Vector (int_riscv_vrsub (vti.Vector vti.RegClass:$merge), (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$rs1), VLOpFrag)), - (!cast("PseudoVSUB_VV_"#vti.LMul.MX#"_TU") + (!cast("PseudoVSUB_VV_"#vti.LMul.MX) vti.RegClass:$merge, vti.RegClass:$rs1, vti.RegClass:$rs2, @@ -5570,10 +5542,11 @@ (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), VLOpFrag)), - (!cast("PseudoVADD_VI_"#vti.LMul.MX) vti.RegClass:$rs1, + (!cast("PseudoVADD_VI_"#vti.LMul.MX) (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, (NegImm simm5_plus1:$rs2), GPR:$vl, - vti.Log2SEW)>; + vti.Log2SEW, TU_MU)>; def : Pat<(vti.Vector (int_riscv_vsub_mask (vti.Vector vti.RegClass:$merge), (vti.Vector vti.RegClass:$rs1), (vti.Scalar simm5_plus1:$rs2), @@ -6232,10 +6205,9 @@ def : Pat<(vti.Vector (int_riscv_vsll (vti.Vector undef), (vti.Vector vti.RegClass:$rs1), (XLenVT 1), VLOpFrag)), - (!cast("PseudoVADD_VV_"#vti.LMul.MX) vti.RegClass:$rs1, - vti.RegClass:$rs1, - GPR:$vl, - vti.Log2SEW)>; + (!cast("PseudoVADD_VV_"#vti.LMul.MX) + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, + vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>; def : Pat<(vti.Vector (int_riscv_vsll_mask (vti.Vector vti.RegClass:$merge), (vti.Vector vti.RegClass:$rs1), (XLenVT 1), Index: llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td +++ llvm/lib/Target/RISCV/RISCVInstrInfoVSDPatterns.td @@ -85,8 +85,8 @@ (op_type op_reg_class:$rs2))), (!cast( !if(isSEWAware, - instruction_name#"_VV_"# vlmul.MX#"_E"#!shl(1, log2sew)#"_TU", - instruction_name#"_VV_"# vlmul.MX#"_TU")) + instruction_name#"_VV_"# vlmul.MX#"_E"#!shl(1, log2sew), + instruction_name#"_VV_"# vlmul.MX)) (result_type (IMPLICIT_DEF)), op_reg_class:$rs1, op_reg_class:$rs2, @@ -109,8 +109,8 @@ (vop_type (SplatPatKind (XLenVT xop_kind:$rs2))))), (!cast( !if(isSEWAware, - instruction_name#_#suffix#_# vlmul.MX#"_E"#!shl(1, log2sew)#"_TU", - instruction_name#_#suffix#_# vlmul.MX#"_TU")) + instruction_name#_#suffix#_# vlmul.MX#"_E"#!shl(1, log2sew), + instruction_name#_#suffix#_# vlmul.MX)) (result_type (IMPLICIT_DEF)), vop_reg_class:$rs1, xop_kind:$rs2, @@ -160,8 +160,8 @@ (vop_type (SplatFPOp xop_kind:$rs2)))), (!cast( !if(isSEWAware, - instruction_name#"_"#vlmul.MX#"_E"#!shl(1, log2sew)#"_TU", - instruction_name#"_"#vlmul.MX#"_TU")) + instruction_name#"_"#vlmul.MX#"_E"#!shl(1, log2sew), + instruction_name#"_"#vlmul.MX)) (result_type (IMPLICIT_DEF)), vop_reg_class:$rs1, (xop_type xop_kind:$rs2), @@ -190,8 +190,8 @@ (fvti.Vector fvti.RegClass:$rs1))), (!cast( !if(isSEWAware, - instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_E"#fvti.SEW#"_TU", - instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_TU")) + instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX#"_E"#fvti.SEW, + instruction_name#"_V"#fvti.ScalarSuffix#"_"#fvti.LMul.MX)) (fvti.Vector (IMPLICIT_DEF)), fvti.RegClass:$rs1, (fvti.Scalar fvti.ScalarRegClass:$rs2), @@ -412,11 +412,13 @@ def : Pat<(op (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs2))), (wti.Vector (extop2 (vti.Vector vti.RegClass:$rs1)))), (!cast(instruction_name#"_VV_"#vti.LMul.MX) - vti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; + (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, + vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; def : Pat<(op (wti.Vector (extop1 (vti.Vector vti.RegClass:$rs2))), (wti.Vector (extop2 (vti.Vector (SplatPat (XLenVT GPR:$rs1)))))), (!cast(instruction_name#"_VX_"#vti.LMul.MX) - vti.RegClass:$rs2, GPR:$rs1, vti.AVL, vti.Log2SEW)>; + (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, + GPR:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; } } } @@ -436,7 +438,8 @@ def : Pat<(op (wti.Vector wti.RegClass:$rs2), (wti.Vector (extop (vti.Vector (SplatPat (XLenVT GPR:$rs1)))))), (!cast(instruction_name#"_WX_"#vti.LMul.MX) - wti.RegClass:$rs2, GPR:$rs1, vti.AVL, vti.Log2SEW)>; + (wti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, GPR:$rs1, + vti.AVL, vti.Log2SEW, TU_MU)>; } } } @@ -492,7 +495,8 @@ (vti.Vector vti.RegClass:$rs1), (vti.Mask true_mask), (XLenVT srcvalue)))), (!cast(instruction_name#"_VV_"#vti.LMul.MX) - vti.RegClass:$rs2, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; + (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, + vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), (vti.Mask true_mask), (XLenVT srcvalue))), @@ -500,13 +504,15 @@ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), (vti.Mask true_mask), (XLenVT srcvalue)))), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) - vti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>; + (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, + vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; def : Pat<(op (wti.Vector (riscv_fpextend_vl_oneuse (vti.Vector vti.RegClass:$rs2), (vti.Mask true_mask), (XLenVT srcvalue))), (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))), (!cast(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) - vti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>; + (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs2, + vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; } } } @@ -529,11 +535,13 @@ (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), (vti.Mask true_mask), (XLenVT srcvalue)))), (!cast(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX) - wti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>; + (wti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, + vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; def : Pat<(op (wti.Vector wti.RegClass:$rs2), (wti.Vector (SplatFPOp (fpext_oneuse vti.ScalarRegClass:$rs1)))), (!cast(instruction_name#"_W"#vti.ScalarSuffix#"_"#vti.LMul.MX) - wti.RegClass:$rs2, vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW)>; + (wti.Vector (IMPLICIT_DEF)), wti.RegClass:$rs2, + vti.ScalarRegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; } } } @@ -718,15 +726,21 @@ // Handle VRSUB specially since it's the only integer binary op with reversed // pattern operands foreach vti = AllIntegerVectors in { + // The AddedComplexity here is covering up a missing matcher for + // widening vwsub.vx which can recognize a extended folded into the + // scalar of the splat. + let AddedComplexity = 20 in let Predicates = GetVTypePredicates.Predicates in { def : Pat<(sub (vti.Vector (SplatPat (XLenVT GPR:$rs2))), (vti.Vector vti.RegClass:$rs1)), (!cast("PseudoVRSUB_VX_"# vti.LMul.MX) - vti.RegClass:$rs1, GPR:$rs2, vti.AVL, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, GPR:$rs2, + vti.AVL, vti.Log2SEW, TU_MU)>; def : Pat<(sub (vti.Vector (SplatPat_simm5 simm5:$rs2)), (vti.Vector vti.RegClass:$rs1)), (!cast("PseudoVRSUB_VI_"# vti.LMul.MX) - vti.RegClass:$rs1, simm5:$rs2, vti.AVL, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, + simm5:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>; } } @@ -748,15 +762,18 @@ def : Pat<(shl (wti.Vector (sext_oneuse (vti.Vector vti.RegClass:$rs1))), (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))), (!cast("PseudoVWADD_VV_"#vti.LMul.MX) - vti.RegClass:$rs1, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; + (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1, + vti.AVL, vti.Log2SEW, TU_MU)>; def : Pat<(shl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs1))), (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))), (!cast("PseudoVWADDU_VV_"#vti.LMul.MX) - vti.RegClass:$rs1, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; + (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1, + vti.AVL, vti.Log2SEW, TU_MU)>; def : Pat<(shl (wti.Vector (anyext_oneuse (vti.Vector vti.RegClass:$rs1))), (wti.Vector (riscv_vmv_v_x_vl (wti.Vector undef), 1, (XLenVT srcvalue)))), (!cast("PseudoVWADDU_VV_"#vti.LMul.MX) - vti.RegClass:$rs1, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; + (wti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, vti.RegClass:$rs1, + vti.AVL, vti.Log2SEW, TU_MU)>; } } @@ -790,7 +807,8 @@ def : Pat<(shl (vti.Vector vti.RegClass:$rs1), (vti.Vector (riscv_vmv_v_x_vl (vti.Vector undef), 1, (XLenVT srcvalue)))), (!cast("PseudoVADD_VV_"# vti.LMul.MX) - vti.RegClass:$rs1, vti.RegClass:$rs1, vti.AVL, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), vti.RegClass:$rs1, + vti.RegClass:$rs1, vti.AVL, vti.Log2SEW, TU_MU)>; } @@ -1051,29 +1069,35 @@ // 13.12. Vector Floating-Point Sign-Injection Instructions def : Pat<(fabs (vti.Vector vti.RegClass:$rs)), (!cast("PseudoVFSGNJX_VV_"# vti.LMul.MX) - vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TU_MU)>; // Handle fneg with VFSGNJN using the same input for both operands. def : Pat<(fneg (vti.Vector vti.RegClass:$rs)), (!cast("PseudoVFSGNJN_VV_"# vti.LMul.MX) - vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs, vti.RegClass:$rs, vti.AVL, vti.Log2SEW, TU_MU)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector vti.RegClass:$rs2))), (!cast("PseudoVFSGNJ_VV_"# vti.LMul.MX) - vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs2)))), (!cast("PseudoVFSGNJ_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) - vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector (fneg vti.RegClass:$rs2)))), (!cast("PseudoVFSGNJN_VV_"# vti.LMul.MX) - vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.RegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>; def : Pat<(vti.Vector (fcopysign (vti.Vector vti.RegClass:$rs1), (vti.Vector (fneg (SplatFPOp vti.ScalarRegClass:$rs2))))), (!cast("PseudoVFSGNJN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) - vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, vti.AVL, vti.Log2SEW, TU_MU)>; } } Index: llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td =================================================================== --- llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -1200,7 +1200,8 @@ (vti.Mask true_mask), VLOpFrag)), (!cast(instruction_name#"_WV_"#vti.LMul.MX) - wti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + wti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>; def : Pat< (vti.Vector (riscv_trunc_vector_vl @@ -1209,7 +1210,8 @@ (vti.Mask true_mask), VLOpFrag)), (!cast(instruction_name#"_WX_"#vti.LMul.MX) - wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>; } } } @@ -1229,7 +1231,8 @@ (wti.Vector (SplatPat_uimm5 uimm5:$rs1))), (vti.Mask true_mask), VLOpFrag)), (!cast(instruction_name#"_WI_"#vti.LMul.MX) - wti.RegClass:$rs2, uimm5:$rs1, GPR:$vl, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + wti.RegClass:$rs2, uimm5:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>; } } @@ -1342,7 +1345,8 @@ srcvalue, (wti.Mask true_mask), VLOpFrag), (vti.Mask true_mask), VLOpFrag)), (!cast(instruction_name#"_WX_"#vti.LMul.MX) - wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + wti.RegClass:$rs2, GPR:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>; } } @@ -1455,12 +1459,14 @@ (wti.Vector (op wti.RegClass:$rs1, (SplatPat XLenVT:$rs2), srcvalue, true_mask, VLOpFrag)), true_mask, VLOpFrag)), (!cast(instruction_name#"_WX_"#vti.LMul.MX) - wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + wti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; def : Pat<(vti.Vector (riscv_trunc_vector_vl (wti.Vector (op wti.RegClass:$rs1, (SplatPat_uimm5 uimm5:$rs2), srcvalue, true_mask, VLOpFrag)), true_mask, VLOpFrag)), (!cast(instruction_name#"_WI_"#vti.LMul.MX) - wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + wti.RegClass:$rs1, uimm5:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; } } } @@ -1612,7 +1618,8 @@ (riscv_vmv_v_x_vl (vti.Vector undef), 1, (XLenVT srcvalue)), srcvalue, (vti.Mask true_mask), VLOpFrag), (!cast("PseudoVADD_VV_"# vti.LMul.MX) - vti.RegClass:$rs1, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>; } // 11.7. Vector Narrowing Integer Right Shift Instructions @@ -1913,7 +1920,8 @@ (vti.Mask true_mask), VLOpFrag), (!cast("PseudoVFSGNJN_VV_"# vti.LMul.MX) - vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW)>; + (vti.Vector (IMPLICIT_DEF)), + vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; def : Pat<(riscv_fcopysign_vl (vti.Vector vti.RegClass:$rs1), (SplatFPOp vti.ScalarRegClass:$rs2), @@ -2386,59 +2394,34 @@ (!cast("PseudoVID_V_"#vti.LMul.MX#"_MASK") (vti.Vector (IMPLICIT_DEF)), (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - - def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector undef), - (vti.Vector vti.RegClass:$rs1), - GPR:$rs2, (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVSLIDE1UP_VX_"#vti.LMul.MX) - vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_slide1up_vl (vti.Vector vti.RegClass:$rd), (vti.Vector vti.RegClass:$rs1), GPR:$rs2, (vti.Mask true_mask), VLOpFrag)), - (!cast("PseudoVSLIDE1UP_VX_"#vti.LMul.MX#"_TU") + (!cast("PseudoVSLIDE1UP_VX_"#vti.LMul.MX) vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector undef), - (vti.Vector vti.RegClass:$rs1), - GPR:$rs2, (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX) - vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_slide1down_vl (vti.Vector vti.RegClass:$rd), (vti.Vector vti.RegClass:$rs1), GPR:$rs2, (vti.Mask true_mask), VLOpFrag)), - (!cast("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX#"_TU") + (!cast("PseudoVSLIDE1DOWN_VX_"#vti.LMul.MX) vti.RegClass:$rd, vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; } } foreach vti = AllFloatVectors in { let Predicates = GetVTypePredicates.Predicates in { - def : Pat<(vti.Vector (riscv_fslide1up_vl (vti.Vector undef), - (vti.Vector vti.RegClass:$rs1), - vti.Scalar:$rs2, (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) - vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_fslide1up_vl (vti.Vector vti.RegClass:$rd), (vti.Vector vti.RegClass:$rs1), vti.Scalar:$rs2, (vti.Mask true_mask), VLOpFrag)), - (!cast("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_TU") + (!cast("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(vti.Vector (riscv_fslide1down_vl (vti.Vector undef), - (vti.Vector vti.RegClass:$rs1), - vti.Scalar:$rs2, (vti.Mask true_mask), - VLOpFrag)), - (!cast("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) - vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_fslide1down_vl (vti.Vector vti.RegClass:$rd), (vti.Vector vti.RegClass:$rs1), vti.Scalar:$rs2, (vti.Mask true_mask), VLOpFrag)), - (!cast("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_TU") + (!cast("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW, TU_MU)>; } } Index: llvm/test/CodeGen/RISCV/double_reduct.ll =================================================================== --- llvm/test/CodeGen/RISCV/double_reduct.ll +++ llvm/test/CodeGen/RISCV/double_reduct.ll @@ -44,11 +44,11 @@ define float @fmin_f32(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: fmin_f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 523264 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vfmin.vv v8, v8, v9 -; CHECK-NEXT: vfredmin.vs v8, v8, v10 +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vfredmin.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> %a) @@ -60,11 +60,11 @@ define float @fmax_f32(<4 x float> %a, <4 x float> %b) { ; CHECK-LABEL: fmax_f32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 1047552 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vfmax.vv v8, v8, v9 -; CHECK-NEXT: vfredmax.vs v8, v8, v10 +; CHECK-NEXT: lui a0, 1047552 +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vfredmax.vs v8, v8, v9 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a) @@ -78,9 +78,9 @@ ; CHECK-LABEL: add_i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vadd.vv v8, v8, v9 -; CHECK-NEXT: vredsum.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vredsum.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r1 = call i32 @llvm.vector.reduce.add.i32.v4i32(<4 x i32> %a) @@ -92,12 +92,11 @@ define i16 @add_ext_i16(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: add_ext_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 16, e16, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vwaddu.vv v12, v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vredsum.vs v8, v12, v10 +; CHECK-NEXT: vwaddu.vv v10, v8, v9 +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vredsum.vs v8, v10, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %ae = zext <16 x i8> %a to <16 x i16> @@ -190,9 +189,9 @@ ; CHECK-LABEL: or_i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vor.vv v8, v8, v9 -; CHECK-NEXT: vredor.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vredor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r1 = call i32 @llvm.vector.reduce.or.i32.v4i32(<4 x i32> %a) @@ -205,9 +204,9 @@ ; CHECK-LABEL: xor_i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vxor.vv v8, v8, v9 -; CHECK-NEXT: vredxor.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vredxor.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r1 = call i32 @llvm.vector.reduce.xor.i32.v4i32(<4 x i32> %a) @@ -235,9 +234,9 @@ ; CHECK-LABEL: umax_i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, zero ; CHECK-NEXT: vmaxu.vv v8, v8, v9 -; CHECK-NEXT: vredmaxu.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v9, zero +; CHECK-NEXT: vredmaxu.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r1 = call i32 @llvm.vector.reduce.umax.i32.v4i32(<4 x i32> %a) @@ -277,11 +276,11 @@ define i32 @smax_i32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: smax_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 524288 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vmax.vv v8, v8, v9 -; CHECK-NEXT: vredmax.vs v8, v8, v10 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vredmax.vs v8, v8, v9 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %r1 = call i32 @llvm.vector.reduce.smax.i32.v4i32(<4 x i32> %a) Index: llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll =================================================================== --- llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll +++ llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll @@ -64,10 +64,10 @@ ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vfwsub.wv v16, v8, v24 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vfwsub.wv v8, v16, v24 ; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu ; CHECK-NEXT: vssubu.vv v4, v4, v8, v0.t ; CHECK-NEXT: vsetvli zero, s0, e32, m8, tu, mu Index: llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll +++ llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll @@ -103,15 +103,15 @@ define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsaddu.vx v8, v8, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vsaddu.vx v8, v8, a1 -; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: ret @@ -122,15 +122,15 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_0) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsaddu.vx v8, v8, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vsaddu.vx v8, v8, a1 -; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI9_1) @@ -157,15 +157,15 @@ define <128 x i1> @fv128(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv128: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI10_0) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vid.v v16 +; CHECK-NEXT: vsaddu.vx v16, v16, a1 +; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsaddu.vx v8, v8, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vsaddu.vx v8, v8, a1 -; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: lui a0, %hi(.LCPI10_1) Index: llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -2139,26 +2139,26 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: mv a4, sp -; RV32-NEXT: vsetvli a5, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a4), zero -; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v10, v11 -; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vand.vx v10, v10, a4 +; RV32-NEXT: vsrl.vi v11, v8, 8 +; RV32-NEXT: mv a5, sp +; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v12, (a5), zero +; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; RV32-NEXT: vand.vv v11, v11, v12 +; RV32-NEXT: vor.vv v10, v11, v10 ; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vsll.vx v10, v8, a1 -; RV32-NEXT: vand.vx v12, v8, a3 -; RV32-NEXT: vsll.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a4 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vand.vx v11, v8, a3 +; RV32-NEXT: vsll.vx v11, v11, a2 +; RV32-NEXT: vor.vv v10, v10, v11 +; RV32-NEXT: vand.vx v11, v8, a4 +; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v11, v8 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 @@ -2431,26 +2431,26 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: mv a4, sp -; RV32-NEXT: vsetvli a5, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a4), zero -; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v12, v14 -; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vsrl.vi v14, v8, 8 +; RV32-NEXT: mv a5, sp +; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma +; RV32-NEXT: vand.vv v14, v14, v16 +; RV32-NEXT: vor.vv v12, v14, v12 ; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: vand.vx v16, v8, a3 -; RV32-NEXT: vsll.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vand.vx v14, v8, a3 +; RV32-NEXT: vsll.vx v14, v14, a2 +; RV32-NEXT: vor.vv v12, v12, v14 +; RV32-NEXT: vand.vx v14, v8, a4 +; RV32-NEXT: vsll.vi v14, v14, 24 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v14, v8 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 4 @@ -2723,26 +2723,26 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: mv a4, sp -; RV32-NEXT: vsetvli a5, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v20, (a4), zero -; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v16, v20 -; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v24, a4 -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vand.vx v16, v16, a4 +; RV32-NEXT: vsrl.vi v20, v8, 8 +; RV32-NEXT: mv a5, sp +; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma +; RV32-NEXT: vand.vv v20, v20, v24 +; RV32-NEXT: vor.vv v16, v20, v16 ; RV32-NEXT: vor.vv v12, v16, v12 ; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vand.vx v24, v8, a3 -; RV32-NEXT: vsll.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vand.vx v24, v8, a4 -; RV32-NEXT: vsll.vi v24, v24, 24 -; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vand.vx v20, v8, a3 +; RV32-NEXT: vsll.vx v20, v20, a2 +; RV32-NEXT: vor.vv v16, v16, v20 +; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v20, v8 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 4 @@ -3085,30 +3085,30 @@ ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v0, v16 -; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vand.vx v24, v24, a4 -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v0, v24, a4 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v16, v16, v0 ; RV32-NEXT: addi a5, sp, 48 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 @@ -3455,30 +3455,30 @@ ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v0, v16 -; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vand.vx v24, v24, a4 -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v0, v24, a4 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v16, v16, v0 ; RV32-NEXT: addi a5, sp, 48 ; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 Index: llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -649,26 +649,26 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v10, v10, a3 ; RV32-NEXT: vor.vv v9, v10, v9 -; RV32-NEXT: vsrl.vi v10, v8, 8 -; RV32-NEXT: addi a4, sp, 8 -; RV32-NEXT: vsetvli a5, zero, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v11, (a4), zero +; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v10, v10, a4 +; RV32-NEXT: vsrl.vi v11, v8, 8 +; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma +; RV32-NEXT: vlse64.v v12, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vand.vv v10, v10, v11 -; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vand.vv v11, v11, v12 +; RV32-NEXT: vor.vv v10, v11, v10 ; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: vsll.vx v10, v8, a1 -; RV32-NEXT: vand.vx v12, v8, a3 -; RV32-NEXT: vsll.vx v12, v12, a2 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsll.vi v12, v12, 24 -; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vand.vx v11, v8, a3 +; RV32-NEXT: vsll.vx v11, v11, a2 +; RV32-NEXT: vor.vv v10, v10, v11 +; RV32-NEXT: vand.vx v11, v8, a4 +; RV32-NEXT: vsll.vi v11, v11, 24 +; RV32-NEXT: vand.vv v8, v8, v12 ; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: vor.vv v8, v11, v8 ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 @@ -809,26 +809,26 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: addi a4, sp, 8 -; RV32-NEXT: vsetvli a5, zero, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v14, (a4), zero +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vsrl.vi v14, v8, 8 +; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: vsetvli a6, zero, e64, m2, ta, ma +; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v12, v14 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vv v14, v14, v16 +; RV32-NEXT: vor.vv v12, v14, v12 ; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: vand.vx v16, v8, a3 -; RV32-NEXT: vsll.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vand.vx v14, v8, a3 +; RV32-NEXT: vsll.vx v14, v14, a2 +; RV32-NEXT: vor.vv v12, v12, v14 +; RV32-NEXT: vand.vx v14, v8, a4 +; RV32-NEXT: vsll.vi v14, v14, 24 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v14, v8 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: addi sp, sp, 16 @@ -969,26 +969,26 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v16, v12 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: addi a4, sp, 8 -; RV32-NEXT: vsetvli a5, zero, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v20, (a4), zero +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v16, v16, a4 +; RV32-NEXT: vsrl.vi v20, v8, 8 +; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma +; RV32-NEXT: vlse64.v v24, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v16, v16, v20 -; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v24, v24, a0 -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vand.vv v20, v20, v24 +; RV32-NEXT: vor.vv v16, v20, v16 ; RV32-NEXT: vor.vv v12, v16, v12 ; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vand.vx v24, v8, a3 -; RV32-NEXT: vsll.vx v24, v24, a2 -; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: vand.vx v24, v8, a0 -; RV32-NEXT: vsll.vi v24, v24, 24 -; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vand.vx v20, v8, a3 +; RV32-NEXT: vsll.vx v20, v20, a2 +; RV32-NEXT: vor.vv v16, v16, v20 +; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vor.vv v8, v20, v8 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: addi sp, sp, 16 @@ -1199,31 +1199,30 @@ ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: addi a4, sp, 8 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v0, v16 -; RV32-NEXT: lui a0, 4080 ; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vand.vx v24, v24, a0 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v0, v24, a4 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vand.vx v8, v8, a0 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb @@ -1437,31 +1436,30 @@ ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: addi a4, sp, 8 -; RV32-NEXT: vsetvli a5, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a4), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v0, v16 -; RV32-NEXT: lui a0, 4080 ; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: vand.vx v24, v24, a0 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v0, v24, a4 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vx v0, v0, a2 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v24, v0 -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vand.vx v8, v8, a0 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vor.vv v16, v16, v0 +; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb Index: llvm/test/CodeGen/RISCV/rvv/commuted-op-indices-regression.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/commuted-op-indices-regression.mir +++ llvm/test/CodeGen/RISCV/rvv/commuted-op-indices-regression.mir @@ -32,7 +32,7 @@ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vrnov0 = COPY $v2 ; CHECK-NEXT: [[PseudoVNMSUB_VV_M1_:%[0-9]+]]:vr = PseudoVNMSUB_VV_M1 [[PseudoVNMSUB_VV_M1_]], [[COPY1]], [[COPY2]], -1, 6 /* e64 */, 1 /* ta, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY [[PseudoVNMSUB_VV_M1_]] - ; CHECK-NEXT: dead [[COPY2]]:vr = PseudoVSLL_VI_M1 [[COPY2]], 11, $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: dead [[PseudoVSLL_VI_M1_:%[0-9]+]]:vr = PseudoVSLL_VI_M1 undef [[PseudoVSLL_VI_M1_]], [[PseudoVSLL_VI_M1_]], 11, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v0 = COPY [[PseudoVNMSUB_VV_M1_]] ; CHECK-NEXT: PseudoRET implicit $v0 %0:vr = COPY $v0 @@ -40,7 +40,7 @@ %2:vrnov0 = COPY $v2 %0:vr = PseudoVNMSUB_VV_M1 %0, %1, killed %2, -1, 6, 1, implicit $vl, implicit $vtype %3:vr = COPY %0 - %3:vr = PseudoVSLL_VI_M1 %3, 11, $noreg, 6, implicit $vl, implicit $vtype + %3:vr = PseudoVSLL_VI_M1 undef %3, %3, 11, $noreg, 6, 0, implicit $vl, implicit $vtype $v0 = COPY %0 PseudoRET implicit $v0 ... Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-deinterleave-load.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vector-deinterleave-load.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-deinterleave-load.ll @@ -9,29 +9,29 @@ ; RV32: # %bb.0: ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV32-NEXT: vlm.v v8, (a0) -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v8, 2 +; RV32-NEXT: vlm.v v0, (a0) ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vmerge.vim v10, v9, 1, v0 -; RV32-NEXT: vmv1r.v v0, v8 -; RV32-NEXT: vmerge.vim v8, v9, 1, v0 +; RV32-NEXT: vmv.v.i v8, 0 +; RV32-NEXT: vmerge.vim v10, v8, 1, v0 ; RV32-NEXT: vid.v v9 ; RV32-NEXT: vadd.vv v11, v9, v9 -; RV32-NEXT: vrgather.vv v9, v8, v11 +; RV32-NEXT: vrgather.vv v9, v10, v11 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV32-NEXT: vmerge.vim v8, v8, 1, v0 +; RV32-NEXT: vadd.vi v12, v11, -16 ; RV32-NEXT: lui a0, 16 ; RV32-NEXT: addi a0, a0, -256 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV32-NEXT: vadd.vi v12, v11, -16 -; RV32-NEXT: vrgather.vv v9, v10, v12, v0.t +; RV32-NEXT: vrgather.vv v9, v8, v12, v0.t ; RV32-NEXT: vmsne.vi v9, v9, 0 ; RV32-NEXT: vadd.vi v12, v11, 1 -; RV32-NEXT: vrgather.vv v13, v8, v12 -; RV32-NEXT: vadd.vi v8, v11, -15 -; RV32-NEXT: vrgather.vv v13, v10, v8, v0.t +; RV32-NEXT: vrgather.vv v13, v10, v12 +; RV32-NEXT: vadd.vi v10, v11, -15 +; RV32-NEXT: vrgather.vv v13, v8, v10, v0.t ; RV32-NEXT: vmsne.vi v8, v13, 0 ; RV32-NEXT: vmv.v.v v0, v9 ; RV32-NEXT: ret @@ -40,29 +40,29 @@ ; RV64: # %bb.0: ; RV64-NEXT: li a1, 32 ; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; RV64-NEXT: vlm.v v8, (a0) -; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vi v0, v8, 2 +; RV64-NEXT: vlm.v v0, (a0) ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: vmerge.vim v10, v9, 1, v0 -; RV64-NEXT: vmv1r.v v0, v8 -; RV64-NEXT: vmerge.vim v8, v9, 1, v0 +; RV64-NEXT: vmv.v.i v8, 0 +; RV64-NEXT: vmerge.vim v10, v8, 1, v0 ; RV64-NEXT: vid.v v9 ; RV64-NEXT: vadd.vv v11, v9, v9 -; RV64-NEXT: vrgather.vv v9, v8, v11 +; RV64-NEXT: vrgather.vv v9, v10, v11 +; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vi v0, v0, 2 +; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; RV64-NEXT: vmerge.vim v8, v8, 1, v0 +; RV64-NEXT: vadd.vi v12, v11, -16 ; RV64-NEXT: lui a0, 16 ; RV64-NEXT: addiw a0, a0, -256 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64-NEXT: vadd.vi v12, v11, -16 -; RV64-NEXT: vrgather.vv v9, v10, v12, v0.t +; RV64-NEXT: vrgather.vv v9, v8, v12, v0.t ; RV64-NEXT: vmsne.vi v9, v9, 0 ; RV64-NEXT: vadd.vi v12, v11, 1 -; RV64-NEXT: vrgather.vv v13, v8, v12 -; RV64-NEXT: vadd.vi v8, v11, -15 -; RV64-NEXT: vrgather.vv v13, v10, v8, v0.t +; RV64-NEXT: vrgather.vv v13, v10, v12 +; RV64-NEXT: vadd.vi v10, v11, -15 +; RV64-NEXT: vrgather.vv v13, v8, v10, v0.t ; RV64-NEXT: vmsne.vi v8, v13, 0 ; RV64-NEXT: vmv.v.v v0, v9 ; RV64-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-transpose.ll @@ -42,12 +42,12 @@ ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vid.v v11 ; RV32-NEXT: vrgather.vv v10, v8, v11 +; RV32-NEXT: vadd.vi v8, v11, -1 ; RV32-NEXT: lui a0, 11 ; RV32-NEXT: addi a0, a0, -1366 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV32-NEXT: vadd.vi v8, v11, -1 ; RV32-NEXT: vrgather.vv v10, v9, v8, v0.t ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret @@ -57,12 +57,12 @@ ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vid.v v11 ; RV64-NEXT: vrgather.vv v10, v8, v11 +; RV64-NEXT: vadd.vi v8, v11, -1 ; RV64-NEXT: lui a0, 11 ; RV64-NEXT: addiw a0, a0, -1366 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64-NEXT: vadd.vi v8, v11, -1 ; RV64-NEXT: vrgather.vv v10, v9, v8, v0.t ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1520,37 +1520,37 @@ ; RV32: # %bb.0: ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v10, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v10, v10, a3 -; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vsll.vx v9, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v10, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v10, v10, a3 +; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v10, v8, a4 +; RV32-NEXT: vsll.vi v10, v10, 24 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vmv.v.i v11, 0 ; RV32-NEXT: vmv.v.i v0, 5 -; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v10, v10, a4, v0 +; RV32-NEXT: lui a5, 1044480 +; RV32-NEXT: vmerge.vxm v11, v11, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v11, v8, 8 -; RV32-NEXT: vand.vv v11, v11, v10 -; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vor.vv v11, v11, v12 -; RV32-NEXT: vor.vv v9, v11, v9 -; RV32-NEXT: vsll.vx v11, v8, a1 -; RV32-NEXT: vand.vx v12, v8, a3 -; RV32-NEXT: vsll.vx v12, v12, a2 -; RV32-NEXT: vor.vv v11, v11, v12 -; RV32-NEXT: vand.vv v10, v8, v10 -; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vand.vv v12, v8, v11 +; RV32-NEXT: vsll.vi v12, v12, 8 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vsrl.vx v12, v8, a3 +; RV32-NEXT: vand.vx v12, v12, a2 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: vand.vv v11, v12, v11 +; RV32-NEXT: vsrl.vi v8, v8, 24 ; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vor.vv v8, v11, v8 -; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 4 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 @@ -1797,30 +1797,30 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: li a4, 85 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vsrl.vi v14, v8, 8 +; RV32-NEXT: li a5, 85 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.x v0, a4 +; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v14, 0 -; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v14, v14, a4, v0 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: lui a5, 1044480 +; RV32-NEXT: vmerge.vxm v16, v16, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v12, v14 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vv v14, v14, v16 +; RV32-NEXT: vor.vv v12, v14, v12 ; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: vand.vx v16, v8, a3 -; RV32-NEXT: vsll.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vand.vx v14, v8, a3 +; RV32-NEXT: vsll.vx v14, v14, a2 +; RV32-NEXT: vor.vv v12, v12, v14 +; RV32-NEXT: vand.vx v14, v8, a4 +; RV32-NEXT: vsll.vi v14, v14, 24 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v14, v8 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsrl.vi v10, v8, 4 @@ -2070,32 +2070,32 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v16, v16, a4 ; RV32-NEXT: vsrl.vi v20, v8, 8 -; RV32-NEXT: lui a4, 5 -; RV32-NEXT: addi a4, a4, 1365 +; RV32-NEXT: lui a5, 5 +; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.x v0, a4 +; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 +; RV32-NEXT: vmv.v.i v24, 0 +; RV32-NEXT: lui a5, 1044480 +; RV32-NEXT: vmerge.vxm v24, v24, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v20, v20, v16 -; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v24, v24, a4 -; RV32-NEXT: vor.vv v20, v20, v24 -; RV32-NEXT: vor.vv v12, v20, v12 -; RV32-NEXT: vsll.vx v20, v8, a1 -; RV32-NEXT: vand.vx v24, v8, a3 -; RV32-NEXT: vsll.vx v24, v24, a2 -; RV32-NEXT: vor.vv v20, v20, v24 -; RV32-NEXT: vand.vx v24, v8, a4 -; RV32-NEXT: vsll.vi v24, v24, 24 -; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vand.vv v20, v20, v24 +; RV32-NEXT: vor.vv v16, v20, v16 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vand.vx v20, v8, a3 +; RV32-NEXT: vsll.vx v20, v20, a2 +; RV32-NEXT: vor.vv v16, v16, v20 +; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: vor.vv v8, v20, v8 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: lui a1, 61681 @@ -2432,53 +2432,53 @@ ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v24, v16, 24 -; RV32-NEXT: li a5, 32 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: li a4, 32 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: lui a6, 349525 -; RV32-NEXT: addi a6, a6, 1365 +; RV32-NEXT: lui a5, 349525 +; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: lui a7, 1044480 -; RV32-NEXT: vmv.v.x v0, a6 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a7, v0 +; RV32-NEXT: lui a6, 1044480 +; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v16, v16, a6, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v8, v16 -; RV32-NEXT: vsll.vi v0, v0, 8 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: lui a6, 4080 +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: vand.vx v0, v0, a6 ; RV32-NEXT: vor.vv v24, v24, v0 ; RV32-NEXT: addi a7, sp, 16 ; RV32-NEXT: vl8r.v v0, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vor.vv v24, v24, v0 ; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v0, v8, a3 -; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vsrl.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vand.vx v0, v8, a3 +; RV32-NEXT: vsll.vx v0, v0, a2 +; RV32-NEXT: vsll.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vand.vx v8, v8, a6 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 @@ -2488,7 +2488,7 @@ ; RV32-NEXT: vsrl.vi v16, v8, 2 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 @@ -2496,8 +2496,8 @@ ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a6 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a5 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 @@ -2812,53 +2812,53 @@ ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v24, v16, 24 -; RV32-NEXT: li a5, 32 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: li a4, 32 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: lui a6, 349525 -; RV32-NEXT: addi a6, a6, 1365 +; RV32-NEXT: lui a5, 349525 +; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: lui a7, 1044480 -; RV32-NEXT: vmv.v.x v0, a6 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a7, v0 +; RV32-NEXT: lui a6, 1044480 +; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v16, v16, a6, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v8, v16 -; RV32-NEXT: vsll.vi v0, v0, 8 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: lui a6, 4080 +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: vand.vx v0, v0, a6 ; RV32-NEXT: vor.vv v24, v24, v0 ; RV32-NEXT: addi a7, sp, 16 ; RV32-NEXT: vl8r.v v0, (a7) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: vor.vv v24, v24, v0 ; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v0, v8, a3 -; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vsrl.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vand.vx v0, v8, a3 +; RV32-NEXT: vsll.vx v0, v0, a2 +; RV32-NEXT: vsll.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vand.vx v8, v8, a6 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 @@ -2868,7 +2868,7 @@ ; RV32-NEXT: vsrl.vi v16, v8, 2 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 @@ -2876,8 +2876,8 @@ ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a6 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a5 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vand.vv v8, v8, v24 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -503,37 +503,37 @@ ; RV32: # %bb.0: ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vx v9, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v10, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v10, v10, a3 -; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vsll.vx v9, v8, a1 +; RV32-NEXT: lui a2, 16 +; RV32-NEXT: addi a2, a2, -256 +; RV32-NEXT: vand.vx v10, v8, a2 +; RV32-NEXT: li a3, 40 +; RV32-NEXT: vsll.vx v10, v10, a3 +; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v10, v8, a4 +; RV32-NEXT: vsll.vi v10, v10, 24 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: vmv.v.i v11, 0 ; RV32-NEXT: vmv.v.i v0, 5 -; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v10, v10, a4, v0 +; RV32-NEXT: lui a5, 1044480 +; RV32-NEXT: vmerge.vxm v11, v11, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsrl.vi v11, v8, 8 -; RV32-NEXT: vand.vv v11, v11, v10 -; RV32-NEXT: vsrl.vi v12, v8, 24 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vor.vv v11, v11, v12 -; RV32-NEXT: vor.vv v9, v11, v9 -; RV32-NEXT: vsll.vx v11, v8, a1 -; RV32-NEXT: vand.vx v12, v8, a3 -; RV32-NEXT: vsll.vx v12, v12, a2 -; RV32-NEXT: vor.vv v11, v11, v12 -; RV32-NEXT: vand.vv v10, v8, v10 -; RV32-NEXT: vsll.vi v10, v10, 8 -; RV32-NEXT: vand.vx v8, v8, a0 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vand.vv v12, v8, v11 +; RV32-NEXT: vsll.vi v12, v12, 8 +; RV32-NEXT: vor.vv v10, v10, v12 +; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: vsrl.vx v10, v8, a1 +; RV32-NEXT: vsrl.vx v12, v8, a3 +; RV32-NEXT: vand.vx v12, v12, a2 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vsrl.vi v12, v8, 8 +; RV32-NEXT: vand.vv v11, v12, v11 +; RV32-NEXT: vsrl.vi v8, v8, 24 +; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vor.vv v8, v11, v8 -; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_v2i64_unmasked: @@ -666,30 +666,30 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v12, v12, a3 ; RV32-NEXT: vor.vv v10, v12, v10 -; RV32-NEXT: vsrl.vi v12, v8, 8 -; RV32-NEXT: li a4, 85 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v12, v12, a4 +; RV32-NEXT: vsrl.vi v14, v8, 8 +; RV32-NEXT: li a5, 85 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV32-NEXT: vmv.v.x v0, a4 +; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32-NEXT: vmv.v.i v14, 0 -; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v14, v14, a4, v0 +; RV32-NEXT: vmv.v.i v16, 0 +; RV32-NEXT: lui a5, 1044480 +; RV32-NEXT: vmerge.vxm v16, v16, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vand.vv v12, v12, v14 -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vv v14, v14, v16 +; RV32-NEXT: vor.vv v12, v14, v12 ; RV32-NEXT: vor.vv v10, v12, v10 ; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: vand.vx v16, v8, a3 -; RV32-NEXT: vsll.vx v16, v16, a2 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vand.vx v16, v8, a0 -; RV32-NEXT: vsll.vi v16, v16, 24 -; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vand.vx v14, v8, a3 +; RV32-NEXT: vsll.vx v14, v14, a2 +; RV32-NEXT: vor.vv v12, v12, v14 +; RV32-NEXT: vand.vx v14, v8, a4 +; RV32-NEXT: vsll.vi v14, v14, 24 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v14, v8 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: ret @@ -825,32 +825,32 @@ ; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vand.vx v16, v16, a3 ; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v16, v16, a4 ; RV32-NEXT: vsrl.vi v20, v8, 8 -; RV32-NEXT: lui a4, 5 -; RV32-NEXT: addi a4, a4, 1365 +; RV32-NEXT: lui a5, 5 +; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV32-NEXT: vmv.v.x v0, a4 +; RV32-NEXT: vmv.v.x v0, a5 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: lui a4, 1044480 -; RV32-NEXT: vmerge.vxm v16, v16, a4, v0 +; RV32-NEXT: vmv.v.i v24, 0 +; RV32-NEXT: lui a5, 1044480 +; RV32-NEXT: vmerge.vxm v24, v24, a5, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vand.vv v20, v20, v16 -; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: lui a0, 4080 -; RV32-NEXT: vand.vx v24, v24, a0 -; RV32-NEXT: vor.vv v20, v20, v24 -; RV32-NEXT: vor.vv v12, v20, v12 -; RV32-NEXT: vsll.vx v20, v8, a1 -; RV32-NEXT: vand.vx v24, v8, a3 -; RV32-NEXT: vsll.vx v24, v24, a2 -; RV32-NEXT: vor.vv v20, v20, v24 -; RV32-NEXT: vand.vx v24, v8, a0 -; RV32-NEXT: vsll.vi v24, v24, 24 -; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vand.vv v20, v20, v24 +; RV32-NEXT: vor.vv v16, v20, v16 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vsll.vx v16, v8, a1 +; RV32-NEXT: vand.vx v20, v8, a3 +; RV32-NEXT: vsll.vx v20, v20, a2 +; RV32-NEXT: vor.vv v16, v16, v20 +; RV32-NEXT: vand.vx v20, v8, a4 +; RV32-NEXT: vsll.vi v20, v20, 24 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsll.vi v8, v8, 8 -; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: vor.vv v8, v20, v8 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: ret ; @@ -1075,48 +1075,49 @@ ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v24, v16, 24 -; RV32-NEXT: li a5, 32 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: li a4, 32 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: lui a6, 349525 -; RV32-NEXT: addi a6, a6, 1365 +; RV32-NEXT: lui a5, 349525 +; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: lui a7, 1044480 -; RV32-NEXT: vmv.v.x v0, a6 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a7, v0 +; RV32-NEXT: lui a6, 1044480 +; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v16, v16, a6, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v8, v16 -; RV32-NEXT: vsll.vi v0, v0, 8 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: vand.vx v0, v0, a0 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v0, v8, a3 +; RV32-NEXT: vsll.vx v0, v0, a2 +; RV32-NEXT: vsll.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vand.vx v8, v8, a0 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v0, v8, a3 -; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vsrl.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 @@ -1344,48 +1345,49 @@ ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: vand.vx v24, v8, a2 -; RV32-NEXT: li a3, 40 -; RV32-NEXT: vsll.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v16, v8, a4 -; RV32-NEXT: vsll.vi v24, v16, 24 -; RV32-NEXT: li a5, 32 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: li a4, 32 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 -; RV32-NEXT: lui a6, 349525 -; RV32-NEXT: addi a6, a6, 1365 +; RV32-NEXT: lui a5, 349525 +; RV32-NEXT: addi a5, a5, 1365 ; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: lui a7, 1044480 -; RV32-NEXT: vmv.v.x v0, a6 -; RV32-NEXT: vsetvli zero, a5, e32, m8, ta, ma -; RV32-NEXT: vmerge.vxm v16, v16, a7, v0 +; RV32-NEXT: lui a6, 1044480 +; RV32-NEXT: vmv.v.x v0, a5 +; RV32-NEXT: vsetvli zero, a4, e32, m8, ta, ma +; RV32-NEXT: vmerge.vxm v16, v16, a6, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v8, v16 -; RV32-NEXT: vsll.vi v0, v0, 8 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: lui a0, 4080 +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: vand.vx v0, v0, a0 ; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vl8r.v v0, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v0, v8, a3 +; RV32-NEXT: vsll.vx v0, v0, a2 +; RV32-NEXT: vsll.vx v24, v8, a1 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vand.vx v8, v8, a0 +; RV32-NEXT: vsll.vi v8, v8, 24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v0, v8, a3 -; RV32-NEXT: vand.vx v0, v0, a2 -; RV32-NEXT: vsrl.vx v24, v8, a1 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v8, v8, 24 -; RV32-NEXT: vand.vx v8, v8, a4 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add sp, sp, a0 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1618,14 +1618,15 @@ define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v15i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: vand.vv v16, v24, v16 +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 @@ -1775,14 +1776,15 @@ define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v16i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a1 +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 1 -; RV32-NEXT: vand.vv v16, v24, v16 +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmf.ll @@ -9,7 +9,8 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v9 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v8 - ; CHECK-NEXT: [[PseudoVFADD_VV_M1_:%[0-9]+]]:vr = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFADD_VV_M1 [[COPY1]], [[COPY]], 2, 6 /* e64 */, implicit $frm + ; CHECK-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVFADD_VV_M1_:%[0-9]+]]:vr = nnan ninf nsz arcp contract afn reassoc nofpexcept PseudoVFADD_VV_M1 [[DEF]], [[COPY1]], [[COPY]], 2, 6 /* e64 */, 1 /* ta, mu */, implicit $frm ; CHECK-NEXT: $v8 = COPY [[PseudoVFADD_VV_M1_]] ; CHECK-NEXT: PseudoRET implicit $v8 %1 = fadd fast <2 x double> %x, %y Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -255,58 +255,48 @@ ; RV32-V128-NEXT: addi sp, sp, -16 ; RV32-V128-NEXT: .cfi_def_cfa_offset 16 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: li a1, 24 -; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: sub sp, sp, a0 -; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 3 -; RV32-V128-NEXT: add a0, sp, a0 -; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-V128-NEXT: lui a0, %hi(.LCPI10_0) +; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_0) +; RV32-V128-NEXT: li a1, 32 +; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-V128-NEXT: vle32.v v0, (a0) +; RV32-V128-NEXT: vmv8r.v v24, v8 ; RV32-V128-NEXT: addi a0, sp, 16 ; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-V128-NEXT: li a0, 32 -; RV32-V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-V128-NEXT: lui a1, %hi(.LCPI10_1) -; RV32-V128-NEXT: addi a1, a1, %lo(.LCPI10_1) -; RV32-V128-NEXT: vle32.v v0, (a1) -; RV32-V128-NEXT: lui a1, %hi(.LCPI10_0) -; RV32-V128-NEXT: addi a1, a1, %lo(.LCPI10_0) -; RV32-V128-NEXT: vle32.v v16, (a1) -; RV32-V128-NEXT: csrr a1, vlenb -; RV32-V128-NEXT: slli a1, a1, 4 -; RV32-V128-NEXT: add a1, sp, a1 -; RV32-V128-NEXT: addi a1, a1, 16 -; RV32-V128-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-V128-NEXT: vrgather.vv v16, v8, v0 -; RV32-V128-NEXT: lui a1, 699051 -; RV32-V128-NEXT: addi a1, a1, -1366 -; RV32-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-V128-NEXT: vmv.v.x v0, a1 -; RV32-V128-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; RV32-V128-NEXT: vrgather.vv v8, v24, v0 +; RV32-V128-NEXT: lui a0, %hi(.LCPI10_1) +; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_1) +; RV32-V128-NEXT: vle32.v v24, (a0) ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: slli a0, a0, 3 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: lui a0, 699051 +; RV32-V128-NEXT: addi a0, a0, -1366 +; RV32-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-V128-NEXT: vmv.v.x v0, a0 +; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV32-V128-NEXT: csrr a0, vlenb ; RV32-V128-NEXT: slli a0, a0, 3 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vrgather.vv v16, v8, v24, v0.t +; RV32-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV32-V128-NEXT: vmv.v.v v24, v8 ; RV32-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-V128-NEXT: vmv4r.v v24, v8 ; RV32-V128-NEXT: addi a0, sp, 16 ; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vwaddu.vv v0, v8, v24 +; RV32-V128-NEXT: vwaddu.vv v0, v8, v16 ; RV32-V128-NEXT: li a0, -1 -; RV32-V128-NEXT: vwmaccu.vx v0, a0, v24 +; RV32-V128-NEXT: vwmaccu.vx v0, a0, v16 ; RV32-V128-NEXT: vmv8r.v v8, v0 +; RV32-V128-NEXT: vmv8r.v v16, v24 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: li a1, 24 -; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: add sp, sp, a0 ; RV32-V128-NEXT: addi sp, sp, 16 ; RV32-V128-NEXT: ret @@ -316,58 +306,48 @@ ; RV64-V128-NEXT: addi sp, sp, -16 ; RV64-V128-NEXT: .cfi_def_cfa_offset 16 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: li a1, 24 -; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: sub sp, sp, a0 -; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 3 -; RV64-V128-NEXT: add a0, sp, a0 -; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-V128-NEXT: lui a0, %hi(.LCPI10_0) +; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_0) +; RV64-V128-NEXT: li a1, 32 +; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV64-V128-NEXT: vle32.v v0, (a0) +; RV64-V128-NEXT: vmv8r.v v24, v8 ; RV64-V128-NEXT: addi a0, sp, 16 ; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV64-V128-NEXT: li a0, 32 -; RV64-V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-V128-NEXT: lui a1, %hi(.LCPI10_1) -; RV64-V128-NEXT: addi a1, a1, %lo(.LCPI10_1) -; RV64-V128-NEXT: vle32.v v0, (a1) -; RV64-V128-NEXT: lui a1, %hi(.LCPI10_0) -; RV64-V128-NEXT: addi a1, a1, %lo(.LCPI10_0) -; RV64-V128-NEXT: vle32.v v16, (a1) -; RV64-V128-NEXT: csrr a1, vlenb -; RV64-V128-NEXT: slli a1, a1, 4 -; RV64-V128-NEXT: add a1, sp, a1 -; RV64-V128-NEXT: addi a1, a1, 16 -; RV64-V128-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-V128-NEXT: vrgather.vv v16, v8, v0 -; RV64-V128-NEXT: lui a1, 699051 -; RV64-V128-NEXT: addiw a1, a1, -1366 -; RV64-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-V128-NEXT: vmv.v.x v0, a1 -; RV64-V128-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; RV64-V128-NEXT: vrgather.vv v8, v24, v0 +; RV64-V128-NEXT: lui a0, %hi(.LCPI10_1) +; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_1) +; RV64-V128-NEXT: vle32.v v24, (a0) ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: slli a0, a0, 3 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: lui a0, 699051 +; RV64-V128-NEXT: addiw a0, a0, -1366 +; RV64-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV64-V128-NEXT: vmv.v.x v0, a0 +; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV64-V128-NEXT: csrr a0, vlenb ; RV64-V128-NEXT: slli a0, a0, 3 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vrgather.vv v16, v8, v24, v0.t +; RV64-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV64-V128-NEXT: vmv.v.v v24, v8 ; RV64-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-V128-NEXT: vmv4r.v v24, v8 ; RV64-V128-NEXT: addi a0, sp, 16 ; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vwaddu.vv v0, v8, v24 +; RV64-V128-NEXT: vwaddu.vv v0, v8, v16 ; RV64-V128-NEXT: li a0, -1 -; RV64-V128-NEXT: vwmaccu.vx v0, a0, v24 +; RV64-V128-NEXT: vwmaccu.vx v0, a0, v16 ; RV64-V128-NEXT: vmv8r.v v8, v0 +; RV64-V128-NEXT: vmv8r.v v16, v24 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: li a1, 24 -; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: add sp, sp, a0 ; RV64-V128-NEXT: addi sp, sp, 16 ; RV64-V128-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -138,11 +138,11 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { ; RV32-LABEL: vrgather_shuffle_xv_v4f64: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; RV32-NEXT: vid.v v12 ; RV32-NEXT: lui a0, %hi(.LCPI7_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI7_0) -; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vid.v v12 ; RV32-NEXT: vrsub.vi v12, v12, 4 ; RV32-NEXT: vmv.v.i v0, 12 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu @@ -152,12 +152,12 @@ ; ; RV64-LABEL: vrgather_shuffle_xv_v4f64: ; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vid.v v10 +; RV64-NEXT: vrsub.vi v12, v10, 4 ; RV64-NEXT: lui a0, %hi(.LCPI7_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI7_0) -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vlse64.v v10, (a0), zero -; RV64-NEXT: vid.v v12 -; RV64-NEXT: vrsub.vi v12, v12, 4 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.i v0, 12 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -414,58 +414,48 @@ ; RV32-V128-NEXT: addi sp, sp, -16 ; RV32-V128-NEXT: .cfi_def_cfa_offset 16 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: li a1, 24 -; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: sub sp, sp, a0 -; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 3 -; RV32-V128-NEXT: add a0, sp, a0 -; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-V128-NEXT: lui a0, %hi(.LCPI17_0) +; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_0) +; RV32-V128-NEXT: li a1, 32 +; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-V128-NEXT: vle32.v v0, (a0) +; RV32-V128-NEXT: vmv8r.v v24, v8 ; RV32-V128-NEXT: addi a0, sp, 16 ; RV32-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV32-V128-NEXT: li a0, 32 -; RV32-V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-V128-NEXT: lui a1, %hi(.LCPI17_1) -; RV32-V128-NEXT: addi a1, a1, %lo(.LCPI17_1) -; RV32-V128-NEXT: vle32.v v0, (a1) -; RV32-V128-NEXT: lui a1, %hi(.LCPI17_0) -; RV32-V128-NEXT: addi a1, a1, %lo(.LCPI17_0) -; RV32-V128-NEXT: vle32.v v16, (a1) -; RV32-V128-NEXT: csrr a1, vlenb -; RV32-V128-NEXT: slli a1, a1, 4 -; RV32-V128-NEXT: add a1, sp, a1 -; RV32-V128-NEXT: addi a1, a1, 16 -; RV32-V128-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-V128-NEXT: vrgather.vv v16, v8, v0 -; RV32-V128-NEXT: lui a1, 699051 -; RV32-V128-NEXT: addi a1, a1, -1366 -; RV32-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-V128-NEXT: vmv.v.x v0, a1 -; RV32-V128-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; RV32-V128-NEXT: vrgather.vv v8, v24, v0 +; RV32-V128-NEXT: lui a0, %hi(.LCPI17_1) +; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_1) +; RV32-V128-NEXT: vle32.v v24, (a0) ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: slli a0, a0, 3 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: lui a0, 699051 +; RV32-V128-NEXT: addi a0, a0, -1366 +; RV32-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV32-V128-NEXT: vmv.v.x v0, a0 +; RV32-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV32-V128-NEXT: csrr a0, vlenb ; RV32-V128-NEXT: slli a0, a0, 3 ; RV32-V128-NEXT: add a0, sp, a0 ; RV32-V128-NEXT: addi a0, a0, 16 -; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vrgather.vv v16, v8, v24, v0.t +; RV32-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV32-V128-NEXT: vmv.v.v v24, v8 ; RV32-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-V128-NEXT: vmv4r.v v24, v8 ; RV32-V128-NEXT: addi a0, sp, 16 ; RV32-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-V128-NEXT: vwaddu.vv v0, v8, v24 +; RV32-V128-NEXT: vwaddu.vv v0, v8, v16 ; RV32-V128-NEXT: li a0, -1 -; RV32-V128-NEXT: vwmaccu.vx v0, a0, v24 +; RV32-V128-NEXT: vwmaccu.vx v0, a0, v16 ; RV32-V128-NEXT: vmv8r.v v8, v0 +; RV32-V128-NEXT: vmv8r.v v16, v24 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: li a1, 24 -; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: slli a0, a0, 4 ; RV32-V128-NEXT: add sp, sp, a0 ; RV32-V128-NEXT: addi sp, sp, 16 ; RV32-V128-NEXT: ret @@ -475,58 +465,48 @@ ; RV64-V128-NEXT: addi sp, sp, -16 ; RV64-V128-NEXT: .cfi_def_cfa_offset 16 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: li a1, 24 -; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: sub sp, sp, a0 -; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 3 -; RV64-V128-NEXT: add a0, sp, a0 -; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-V128-NEXT: lui a0, %hi(.LCPI17_0) +; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_0) +; RV64-V128-NEXT: li a1, 32 +; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV64-V128-NEXT: vle32.v v0, (a0) +; RV64-V128-NEXT: vmv8r.v v24, v8 ; RV64-V128-NEXT: addi a0, sp, 16 ; RV64-V128-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; RV64-V128-NEXT: li a0, 32 -; RV64-V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-V128-NEXT: lui a1, %hi(.LCPI17_1) -; RV64-V128-NEXT: addi a1, a1, %lo(.LCPI17_1) -; RV64-V128-NEXT: vle32.v v0, (a1) -; RV64-V128-NEXT: lui a1, %hi(.LCPI17_0) -; RV64-V128-NEXT: addi a1, a1, %lo(.LCPI17_0) -; RV64-V128-NEXT: vle32.v v16, (a1) -; RV64-V128-NEXT: csrr a1, vlenb -; RV64-V128-NEXT: slli a1, a1, 4 -; RV64-V128-NEXT: add a1, sp, a1 -; RV64-V128-NEXT: addi a1, a1, 16 -; RV64-V128-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-V128-NEXT: vrgather.vv v16, v8, v0 -; RV64-V128-NEXT: lui a1, 699051 -; RV64-V128-NEXT: addiw a1, a1, -1366 -; RV64-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-V128-NEXT: vmv.v.x v0, a1 -; RV64-V128-NEXT: vsetvli zero, a0, e32, m8, ta, mu +; RV64-V128-NEXT: vrgather.vv v8, v24, v0 +; RV64-V128-NEXT: lui a0, %hi(.LCPI17_1) +; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_1) +; RV64-V128-NEXT: vle32.v v24, (a0) ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: slli a0, a0, 3 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: lui a0, 699051 +; RV64-V128-NEXT: addiw a0, a0, -1366 +; RV64-V128-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV64-V128-NEXT: vmv.v.x v0, a0 +; RV64-V128-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV64-V128-NEXT: csrr a0, vlenb ; RV64-V128-NEXT: slli a0, a0, 3 ; RV64-V128-NEXT: add a0, sp, a0 ; RV64-V128-NEXT: addi a0, a0, 16 -; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vrgather.vv v16, v8, v24, v0.t +; RV64-V128-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: vrgather.vv v8, v16, v24, v0.t +; RV64-V128-NEXT: vmv.v.v v24, v8 ; RV64-V128-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-V128-NEXT: vmv4r.v v24, v8 ; RV64-V128-NEXT: addi a0, sp, 16 ; RV64-V128-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV64-V128-NEXT: vwaddu.vv v0, v8, v24 +; RV64-V128-NEXT: vwaddu.vv v0, v8, v16 ; RV64-V128-NEXT: li a0, -1 -; RV64-V128-NEXT: vwmaccu.vx v0, a0, v24 +; RV64-V128-NEXT: vwmaccu.vx v0, a0, v16 ; RV64-V128-NEXT: vmv8r.v v8, v0 +; RV64-V128-NEXT: vmv8r.v v16, v24 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: li a1, 24 -; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: slli a0, a0, 4 ; RV64-V128-NEXT: add sp, sp, a0 ; RV64-V128-NEXT: addi sp, sp, 16 ; RV64-V128-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -99,10 +99,10 @@ ; CHECK-LABEL: vrgather_shuffle_xv_v4i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu -; CHECK-NEXT: vmv.v.i v9, 5 -; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vrsub.vi v10, v9, 4 ; CHECK-NEXT: vmv.v.i v0, 12 -; CHECK-NEXT: vrsub.vi v10, v10, 4 +; CHECK-NEXT: vmv.v.i v9, 5 ; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: ret @@ -177,40 +177,39 @@ define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV32-LABEL: vrgather_shuffle_vv_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vmv.v.i v16, 5 ; RV32-NEXT: lui a0, %hi(.LCPI11_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0) +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle16.v v20, (a0) -; RV32-NEXT: vmv.v.i v21, 2 -; RV32-NEXT: vslideup.vi v21, v16, 7 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; RV32-NEXT: vrgatherei16.vv v16, v8, v20 +; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32-NEXT: vmv.v.i v8, 5 +; RV32-NEXT: vmv.v.i v9, 2 +; RV32-NEXT: vslideup.vi v9, v8, 7 ; RV32-NEXT: li a0, 164 -; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vrgatherei16.vv v16, v12, v21, v0.t +; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu +; RV32-NEXT: vrgatherei16.vv v16, v12, v9, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_shuffle_vv_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: li a0, 5 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vmv.s.x v16, a0 -; RV64-NEXT: vmv.v.i v20, 2 ; RV64-NEXT: lui a0, %hi(.LCPI11_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI11_0) -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vslideup.vi v20, v16, 7 -; RV64-NEXT: vrgather.vv v16, v8, v24 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vle64.v v20, (a0) +; RV64-NEXT: vmv4r.v v16, v8 +; RV64-NEXT: vrgather.vv v8, v16, v20 +; RV64-NEXT: li a0, 5 +; RV64-NEXT: vmv.s.x v20, a0 +; RV64-NEXT: vmv.v.i v16, 2 +; RV64-NEXT: vslideup.vi v16, v20, 7 ; RV64-NEXT: li a0, 164 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vv v16, v12, v20, v0.t -; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: vrgather.vv v8, v12, v16, v0.t ; RV64-NEXT: ret %s = shufflevector <8 x i64> %x, <8 x i64> %y, <8 x i32> ret <8 x i64> %s @@ -219,20 +218,20 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) { ; RV32-LABEL: vrgather_shuffle_xv_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: lui a0, %hi(.LCPI12_1) -; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) -; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: lui a0, %hi(.LCPI12_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_0) -; RV32-NEXT: vle16.v v17, (a0) +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: vmv.v.i v20, -1 ; RV32-NEXT: vrgatherei16.vv v12, v20, v16 +; RV32-NEXT: lui a0, %hi(.LCPI12_1) +; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) +; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: li a0, 113 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t +; RV32-NEXT: vrgatherei16.vv v12, v8, v16, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; @@ -257,20 +256,20 @@ define <8 x i64> @vrgather_shuffle_vx_v8i64(<8 x i64> %x) { ; RV32-LABEL: vrgather_shuffle_vx_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: lui a0, %hi(.LCPI13_1) -; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1) -; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: lui a0, %hi(.LCPI13_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI13_0) -; RV32-NEXT: vle16.v v17, (a0) +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vle16.v v16, (a0) ; RV32-NEXT: vrgatherei16.vv v12, v8, v16 +; RV32-NEXT: lui a0, %hi(.LCPI13_1) +; RV32-NEXT: addi a0, a0, %lo(.LCPI13_1) +; RV32-NEXT: vle16.v v8, (a0) ; RV32-NEXT: li a0, 140 ; RV32-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV32-NEXT: vmv.v.i v8, 5 -; RV32-NEXT: vrgatherei16.vv v12, v8, v17, v0.t +; RV32-NEXT: vmv.v.i v16, 5 +; RV32-NEXT: vrgatherei16.vv v12, v16, v8, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret ; @@ -388,10 +387,9 @@ define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: li a0, 66 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v0, a0 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -422,11 +420,10 @@ define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i0we4: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 67 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vrgather.vi v10, v8, 2 +; CHECK-NEXT: li a0, 67 +; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu @@ -473,14 +470,14 @@ ; CHECK-LABEL: splat_ve2_we0_ins_i2we4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 4 +; CHECK-NEXT: vrgather.vi v10, v8, 2 +; CHECK-NEXT: vmv.v.i v8, 4 ; CHECK-NEXT: vmv.v.i v11, 0 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v11, v10, 2 +; CHECK-NEXT: vslideup.vi v11, v8, 2 ; CHECK-NEXT: li a0, 70 ; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -491,38 +488,38 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { ; RV32-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vmv.v.i v10, 6 -; RV32-NEXT: vmv.v.i v11, 0 -; RV32-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; RV32-NEXT: vslideup.vi v11, v10, 5 ; RV32-NEXT: lui a0, 8256 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v12, a0 -; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV32-NEXT: vmv.v.x v11, a0 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV32-NEXT: vrgather.vv v10, v8, v11 +; RV32-NEXT: vmv.v.i v8, 6 +; RV32-NEXT: vmv.v.i v11, 0 +; RV32-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; RV32-NEXT: vslideup.vi v11, v8, 5 ; RV32-NEXT: li a0, 98 ; RV32-NEXT: vmv.v.x v0, a0 -; RV32-NEXT: vrgather.vv v10, v8, v12 +; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV32-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV32-NEXT: vmv1r.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vmv.v.i v10, 6 -; RV64-NEXT: vmv.v.i v11, 0 -; RV64-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; RV64-NEXT: vslideup.vi v11, v10, 5 ; RV64-NEXT: lui a0, 8256 ; RV64-NEXT: addiw a0, a0, 2 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; RV64-NEXT: vmv.v.x v12, a0 -; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu +; RV64-NEXT: vmv.v.x v11, a0 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; RV64-NEXT: vrgather.vv v10, v8, v11 +; RV64-NEXT: vmv.v.i v8, 6 +; RV64-NEXT: vmv.v.i v11, 0 +; RV64-NEXT: vsetivli zero, 6, e8, mf2, tu, ma +; RV64-NEXT: vslideup.vi v11, v8, 5 ; RV64-NEXT: li a0, 98 ; RV64-NEXT: vmv.v.x v0, a0 -; RV64-NEXT: vrgather.vv v10, v8, v12 +; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV64-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-NEXT: vmv1r.v v8, v10 ; RV64-NEXT: ret @@ -765,16 +762,16 @@ define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: unmergable: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI46_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vle8.v v11, (a0) ; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vadd.vi v12, v10, 2 +; CHECK-NEXT: vadd.vi v11, v10, 2 +; CHECK-NEXT: lui a0, %hi(.LCPI46_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) +; CHECK-NEXT: vle8.v v12, (a0) ; CHECK-NEXT: li a0, 234 ; CHECK-NEXT: vmv.v.x v0, a0 -; CHECK-NEXT: vrgather.vv v10, v8, v12 -; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vrgather.vv v10, v9, v12, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1249,11 +1249,12 @@ ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vdivu.vv v9, v8, v9 -; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vid.v v10 ; CHECK-NEXT: vadd.vi v10, v10, 12 +; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vdivu.vv v8, v8, v10 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vi v9, v8, 4 @@ -5723,7 +5724,7 @@ ; LMULMAX1-RV64-NEXT: vsra.vv v11, v11, v12 ; LMULMAX1-RV64-NEXT: vadd.vv v9, v11, v9 ; LMULMAX1-RV64-NEXT: vmulh.vv v10, v8, v10 -; LMULMAX1-RV64-NEXT: vmacc.vv v10, v13, v8 +; LMULMAX1-RV64-NEXT: vmacc.vv v10, v8, v13 ; LMULMAX1-RV64-NEXT: vsrl.vx v8, v10, a2 ; LMULMAX1-RV64-NEXT: vsra.vv v10, v10, v12 ; LMULMAX1-RV64-NEXT: vadd.vv v8, v10, v8 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -656,15 +656,15 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 86 +; RV64-NEXT: li a3, 92 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd6, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 86 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xdc, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 92 * vlenb ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: addi a2, a1, 256 ; RV64-NEXT: vle64.v v16, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 53 +; RV64-NEXT: li a3, 68 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 @@ -672,116 +672,125 @@ ; RV64-NEXT: addi a2, a1, 128 ; RV64-NEXT: vle64.v v8, (a2) ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 77 +; RV64-NEXT: li a3, 84 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vle64.v v8, (a1) -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vle64.v v24, (a1) ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vrgather.vi v8, v16, 4 ; RV64-NEXT: li a1, 128 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.v.x v1, a1 -; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma -; RV64-NEXT: vslidedown.vi v24, v16, 8 +; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 40 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma +; RV64-NEXT: vslidedown.vi v16, v16, 8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 52 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vrgather.vi v8, v24, 2, v0.t +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vrgather.vi v8, v16, 2, v0.t ; RV64-NEXT: vmv.v.v v4, v8 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: li a1, 6 -; RV64-NEXT: vid.v v16 -; RV64-NEXT: vmul.vx v24, v16, a1 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vid.v v8 +; RV64-NEXT: vmul.vx v8, v8, a1 ; RV64-NEXT: li a1, 56 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 69 +; RV64-NEXT: li a3, 60 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vrgather.vv v16, v24, v8 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 76 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vadd.vi v8, v8, -16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 44 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v8, v24 -; RV64-NEXT: vadd.vi v24, v24, -16 +; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 37 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 84 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 44 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v8, v24, v0.t +; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma ; RV64-NEXT: vmv.v.v v4, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 25 +; RV64-NEXT: li a2, 36 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 68 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v12, v8, 5 -; RV64-NEXT: vmv1r.v v0, v1 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v16, v0, 5 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 40 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 52 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v12, v24, 3, v0.t +; RV64-NEXT: vrgather.vi v16, v24, 3, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 41 +; RV64-NEXT: li a2, 44 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 76 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vadd.vi v0, v8, 1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 60 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 @@ -789,436 +798,421 @@ ; RV64-NEXT: vrgather.vv v16, v24, v0 ; RV64-NEXT: vadd.vi v24, v8, -15 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 37 +; RV64-NEXT: li a2, 24 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v8, v24, v0.t -; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 41 +; RV64-NEXT: li a2, 84 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 41 +; RV64-NEXT: li a2, 24 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vmv.v.i v8, 6 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 29 +; RV64-NEXT: li a2, 44 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v12, zero -; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma +; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v20, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 3 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 44 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vslideup.vi v8, v12, 5 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 76 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v12, v16, v8 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v8, v24, 2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 60 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v0, v8 +; RV64-NEXT: li a1, 24 +; RV64-NEXT: vadd.vi v8, v24, -14 +; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma +; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 24 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v12, v16, 4, v0.t +; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 37 +; RV64-NEXT: li a2, 84 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vmv.v.i v12, 6 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v8, v0, 2 +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv.s.x v8, zero +; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 12 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8 +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vslideup.vi v12, v8, 5 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 4 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 68 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: li a1, 24 -; RV64-NEXT: vadd.vi v8, v0, -14 -; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma -; RV64-NEXT: vmv.v.x v0, a1 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v20, v0, v12 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 40 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 4 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 52 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v24, v16, v8, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v20, v8, 4, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma +; RV64-NEXT: vmv.v.v v20, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 37 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v8, v24 +; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 37 +; RV64-NEXT: li a2, 76 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: li a1, 1 -; RV64-NEXT: vmv.v.i v8, 7 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 4 -; RV64-NEXT: add a2, a3, a2 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs4r.v v8, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v12, a1 -; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v0, v8, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 13 +; RV64-NEXT: li a2, 60 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vslideup.vi v8, v12, 5 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v16, v24, v0 +; RV64-NEXT: vmv.v.v v24, v16 +; RV64-NEXT: vadd.vi v16, v8, -13 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 24 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v12, v16, v8 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 84 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v12, v16, 5, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v24, v8, v16, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 24 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: li a1, 1 +; RV64-NEXT: vmv.v.i v20, 7 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 20 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs4r.v v20, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vmv.s.x v8, a1 +; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v24, v0, 3 +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vslideup.vi v20, v8, 5 +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 68 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v16, v24 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v24, v20 ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 40 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vadd.vi v16, v0, -13 -; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 52 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t +; RV64-NEXT: vrgather.vi v8, v24, 5, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 24 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v12, v8 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v8, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 5 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 24 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 3 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 29 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vslideup.vi v16, v8, 6 +; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v20, v8, 6 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: li a1, 192 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 53 +; RV64-NEXT: li a3, 68 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v20, v8, 2 +; RV64-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v8, v24, 2 ; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64-NEXT: vmv.v.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 3 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 52 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v20, v8, v16, v0.t +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v24, v20, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 29 +; RV64-NEXT: li a2, 40 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 76 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v24, v0, 4 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v0, v24, 4 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 60 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v16, v24 +; RV64-NEXT: vrgather.vv v8, v16, v0 ; RV64-NEXT: li a1, 28 -; RV64-NEXT: vadd.vi v16, v0, -12 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vadd.vi v16, v24, -12 +; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a1 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 12 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vs1r.v v0, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 84 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v16, v24, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 29 +; RV64-NEXT: li a2, 40 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vmv.v.v v12, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 29 +; RV64-NEXT: li a2, 40 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 7, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 13 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 4 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 20 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vslideup.vi v24, v8, 6 +; RV64-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vslideup.vi v4, v8, 6 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 68 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vi v16, v8, 3 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vi v8, v16, 3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 3 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 45 +; RV64-NEXT: li a2, 52 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v8, v24, v0.t +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v16, v4, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 4 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 20 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 76 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v24, v8, 5 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vadd.vi v0, v16, 5 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 60 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v8, v0, v24 +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v16, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 68 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 61 +; RV64-NEXT: li a2, 76 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vadd.vi v8, v24, -11 +; RV64-NEXT: vadd.vi v24, v24, -11 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 76 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 77 +; RV64-NEXT: li a2, 84 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 69 +; RV64-NEXT: li a2, 76 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 53 +; RV64-NEXT: li a2, 68 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgather.vv v16, v24, v8, v0.t +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a2, a1, 4 -; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: li a2, 20 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v12, v16 +; RV64-NEXT: vmv.v.v v12, v8 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 25 +; RV64-NEXT: li a2, 36 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vse64.v v16, (a0) ; RV64-NEXT: addi a1, a0, 320 ; RV64-NEXT: vse64.v v12, (a1) ; RV64-NEXT: addi a1, a0, 256 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 29 +; RV64-NEXT: li a3, 40 ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 @@ -1226,30 +1220,29 @@ ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 192 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 5 -; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: li a3, 24 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a3, 37 -; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: slli a2, a2, 5 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a0, a0, 64 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 41 +; RV64-NEXT: li a2, 44 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: li a1, 86 +; RV64-NEXT: li a1, 92 ; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -2301,28 +2301,27 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v32i32_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vsll.vi v24, v8, 3 -; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: sltu a3, a1, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: bltu a1, a3, .LBB93_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB93_2: +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: addi a2, a1, -16 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB93_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB93_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_v32i32_v32f64: @@ -2375,28 +2374,27 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_v32i32_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vsll.vi v24, v8, 3 -; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: sltu a3, a1, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: bltu a1, a3, .LBB94_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB94_2: +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: addi a2, a1, -16 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB94_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB94_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64: @@ -2435,28 +2433,27 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_v32i32_v32f64: ; RV32: # %bb.0: -; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vsll.vi v24, v8, 3 -; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: sltu a3, a1, a2 -; RV32-NEXT: addi a3, a3, -1 -; RV32-NEXT: and a2, a3, a2 +; RV32-NEXT: li a3, 16 +; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: bltu a1, a3, .LBB95_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB95_2: +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: addi a2, a1, -16 +; RV32-NEXT: sltu a1, a1, a2 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t -; RV32-NEXT: li a2, 16 -; RV32-NEXT: bltu a1, a2, .LBB95_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB95_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmv1r.v v0, v1 -; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64: Index: llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir +++ llvm/test/CodeGen/RISCV/rvv/mask-reg-alloc.mir @@ -20,7 +20,7 @@ ; CHECK-NEXT: renamable $v8 = PseudoVMERGE_VIM_M1 killed renamable $v2, 1, killed renamable $v0, 1, 3 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: renamable $v0 = COPY killed renamable $v1 ; CHECK-NEXT: renamable $v9 = PseudoVMERGE_VIM_M1 killed renamable $v3, 1, killed renamable $v0, 1, 3 /* e8 */, implicit $vl, implicit $vtype - ; CHECK-NEXT: renamable $v0 = PseudoVADD_VV_M1 killed renamable $v8, killed renamable $v9, 1, 3 /* e8 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: renamable $v0 = PseudoVADD_VV_M1 undef renamable $v0, killed renamable $v8, killed renamable $v9, 1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoRET implicit $v0 %0:vr = COPY $v0 %1:vr = COPY $v1 @@ -30,7 +30,8 @@ %5:vrnov0 = PseudoVMERGE_VIM_M1 killed %2, 1, %4, 1, 3 %6:vmv0 = COPY %1 %7:vrnov0 = PseudoVMERGE_VIM_M1 killed %3, 1, %6, 1, 3 - %8:vr = PseudoVADD_VV_M1 killed %5, killed %7, 1, 3 + %pt:vr = IMPLICIT_DEF + %8:vr = PseudoVADD_VV_M1 %pt, killed %5, killed %7, 1, 3, 0 $v0 = COPY %8 PseudoRET implicit $v0 ... Index: llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll +++ llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -209,88 +209,86 @@ define @reverse_nxv8i1( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv8i1: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 -; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v10 -; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vid.v v8 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 -; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v9, 1 +; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v10, 0 +; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v10, v10, 1, v0 +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v11, v10, v8 +; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v11, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_nxv8i1: ; RV32-BITS-256: # %bb.0: -; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; RV32-BITS-256-NEXT: vmv.v.i v8, 0 -; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: addi a0, a0, -1 -; RV32-BITS-256-NEXT: vid.v v9 -; RV32-BITS-256-NEXT: vrsub.vx v9, v9, a0 -; RV32-BITS-256-NEXT: vrgather.vv v10, v8, v9 +; RV32-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; RV32-BITS-256-NEXT: vid.v v8 +; RV32-BITS-256-NEXT: vrsub.vx v8, v8, a0 +; RV32-BITS-256-NEXT: vmv.v.i v9, 0 +; RV32-BITS-256-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-BITS-256-NEXT: vrgather.vv v10, v9, v8 ; RV32-BITS-256-NEXT: vand.vi v8, v10, 1 ; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_nxv8i1: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; RV32-BITS-512-NEXT: vmv.v.i v8, 0 -; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: addi a0, a0, -1 -; RV32-BITS-512-NEXT: vid.v v9 -; RV32-BITS-512-NEXT: vrsub.vx v9, v9, a0 -; RV32-BITS-512-NEXT: vrgather.vv v10, v8, v9 +; RV32-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; RV32-BITS-512-NEXT: vid.v v8 +; RV32-BITS-512-NEXT: vrsub.vx v8, v8, a0 +; RV32-BITS-512-NEXT: vmv.v.i v9, 0 +; RV32-BITS-512-NEXT: vmerge.vim v9, v9, 1, v0 +; RV32-BITS-512-NEXT: vrgather.vv v10, v9, v8 ; RV32-BITS-512-NEXT: vand.vi v8, v10, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 ; RV32-BITS-512-NEXT: ret ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv8i1: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 -; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 -; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v10 -; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vid.v v8 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v9, v8, v10 -; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v9, 1 +; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v10, 0 +; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v10, v10, 1, v0 +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v11, v10, v8 +; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v11, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_nxv8i1: ; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; RV64-BITS-256-NEXT: vmv.v.i v8, 0 -; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: addi a0, a0, -1 -; RV64-BITS-256-NEXT: vid.v v9 -; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0 -; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9 +; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; RV64-BITS-256-NEXT: vid.v v8 +; RV64-BITS-256-NEXT: vrsub.vx v8, v8, a0 +; RV64-BITS-256-NEXT: vmv.v.i v9, 0 +; RV64-BITS-256-NEXT: vmerge.vim v9, v9, 1, v0 +; RV64-BITS-256-NEXT: vrgather.vv v10, v9, v8 ; RV64-BITS-256-NEXT: vand.vi v8, v10, 1 ; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-256-NEXT: ret ; ; RV64-BITS-512-LABEL: reverse_nxv8i1: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; RV64-BITS-512-NEXT: vmv.v.i v8, 0 -; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: addi a0, a0, -1 -; RV64-BITS-512-NEXT: vid.v v9 -; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0 -; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9 +; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma +; RV64-BITS-512-NEXT: vid.v v8 +; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0 +; RV64-BITS-512-NEXT: vmv.v.i v9, 0 +; RV64-BITS-512-NEXT: vmerge.vim v9, v9, 1, v0 +; RV64-BITS-512-NEXT: vrgather.vv v10, v9, v8 ; RV64-BITS-512-NEXT: vand.vi v8, v10, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 ; RV64-BITS-512-NEXT: ret @@ -497,18 +495,18 @@ define @reverse_nxv64i1( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv64i1: ; RV32-BITS-UNKNOWN: # %bb.0: -; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 -; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vid.v v16 +; RV32-BITS-UNKNOWN-NEXT: vid.v v8 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, -1 -; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 -; RV32-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v8, v16 -; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v12, v16 +; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 +; RV32-BITS-UNKNOWN-NEXT: vmerge.vim v16, v16, 1, v0 +; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v16, v8 +; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v20, v8 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v24, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 @@ -531,17 +529,18 @@ ; ; RV32-BITS-512-LABEL: reverse_nxv64i1: ; RV32-BITS-512: # %bb.0: -; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-BITS-512-NEXT: vmv.v.i v8, 0 -; RV32-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV32-BITS-512-NEXT: vid.v v16 +; RV32-BITS-512-NEXT: vid.v v8 ; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: slli a0, a0, 2 ; RV32-BITS-512-NEXT: addi a0, a0, -1 -; RV32-BITS-512-NEXT: vrsub.vx v16, v16, a0 -; RV32-BITS-512-NEXT: vrgather.vv v28, v8, v16 -; RV32-BITS-512-NEXT: vrgather.vv v24, v12, v16 +; RV32-BITS-512-NEXT: vrsub.vx v8, v8, a0 +; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV32-BITS-512-NEXT: vmv.v.i v16, 0 +; RV32-BITS-512-NEXT: vmerge.vim v16, v16, 1, v0 +; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; RV32-BITS-512-NEXT: vrgather.vv v28, v16, v8 +; RV32-BITS-512-NEXT: vrgather.vv v24, v20, v8 ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-512-NEXT: vand.vi v8, v24, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 @@ -549,18 +548,18 @@ ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv64i1: ; RV64-BITS-UNKNOWN: # %bb.0: -; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0 -; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vid.v v16 +; RV64-BITS-UNKNOWN-NEXT: vid.v v8 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 ; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1 -; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0 -; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e8, m4, ta, ma -; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v8, v16 -; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v12, v16 +; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v16, 0 +; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v16, v16, 1, v0 +; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v28, v16, v8 +; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v24, v20, v8 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v24, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 @@ -583,17 +582,18 @@ ; ; RV64-BITS-512-LABEL: reverse_nxv64i1: ; RV64-BITS-512: # %bb.0: -; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-BITS-512-NEXT: vmv.v.i v8, 0 -; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; RV64-BITS-512-NEXT: vid.v v16 +; RV64-BITS-512-NEXT: vid.v v8 ; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: slli a0, a0, 2 ; RV64-BITS-512-NEXT: addi a0, a0, -1 -; RV64-BITS-512-NEXT: vrsub.vx v16, v16, a0 -; RV64-BITS-512-NEXT: vrgather.vv v28, v8, v16 -; RV64-BITS-512-NEXT: vrgather.vv v24, v12, v16 +; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0 +; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV64-BITS-512-NEXT: vmv.v.i v16, 0 +; RV64-BITS-512-NEXT: vmerge.vim v16, v16, 1, v0 +; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma +; RV64-BITS-512-NEXT: vrgather.vv v28, v16, v8 +; RV64-BITS-512-NEXT: vrgather.vv v24, v20, v8 ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-512-NEXT: vand.vi v8, v24, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 Index: llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir +++ llvm/test/CodeGen/RISCV/rvv/reg-coalescing.mir @@ -16,7 +16,7 @@ ; CHECK-NEXT: %1.sub_vrm2_1:vrn2m2 = PseudoVLE32_V_M2 %pt2, $x10, 1, 5 /* e32 */, 0 /* tu, mu */ ; CHECK-NEXT: %pt3:vrm2 = IMPLICIT_DEF ; CHECK-NEXT: [[PseudoVLE32_V_M2_:%[0-9]+]]:vrm2 = PseudoVLE32_V_M2 %pt3, $x10, 1, 5 /* e32 */, 0 /* tu, mu */ - ; CHECK-NEXT: undef early-clobber %5.sub_vrm2_0:vrn2m2 = PseudoVRGATHER_VI_M2 %1.sub_vrm2_0, 0, 1, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: undef early-clobber %5.sub_vrm2_0:vrn2m2 = PseudoVRGATHER_VI_M2 undef %5.sub_vrm2_0, %1.sub_vrm2_0, 0, 1, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: %5.sub_vrm2_1:vrn2m2 = COPY %1.sub_vrm2_1 ; CHECK-NEXT: PseudoVSUXSEG2EI32_V_M2_M2 %5, $x10, [[PseudoVLE32_V_M2_]], 1, 5 /* e32 */, implicit $vl, implicit $vtype %pt:vrm2 = IMPLICIT_DEF @@ -25,7 +25,7 @@ %0.sub_vrm2_1:vrn2m2 = PseudoVLE32_V_M2 %pt2, $x10, 1, 5, 0 %pt3:vrm2 = IMPLICIT_DEF %1:vrm2 = PseudoVLE32_V_M2 %pt3, $x10, 1, 5, 0 - undef early-clobber %2.sub_vrm2_0:vrn2m2 = PseudoVRGATHER_VI_M2 %0.sub_vrm2_0:vrn2m2, 0, 1, 5, implicit $vl, implicit $vtype + undef early-clobber %2.sub_vrm2_0:vrn2m2 = PseudoVRGATHER_VI_M2 undef %2.sub_vrm2_0, %0.sub_vrm2_0:vrn2m2, 0, 1, 5, 0, implicit $vl, implicit $vtype %2.sub_vrm2_1:vrn2m2 = COPY %0.sub_vrm2_1:vrn2m2 PseudoVSUXSEG2EI32_V_M2_M2 %2:vrn2m2, $x10, %1:vrm2, 1, 5, implicit $vl, implicit $vtype ... Index: llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -15,27 +15,31 @@ ; SPILL-O0-NEXT: slli a1, a1, 1 ; SPILL-O0-NEXT: sub sp, sp, a1 ; SPILL-O0-NEXT: sw a0, 8(sp) # 4-byte Folded Spill +; SPILL-O0-NEXT: vmv1r.v v10, v9 +; SPILL-O0-NEXT: vmv1r.v v9, v8 ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 ; SPILL-O0-NEXT: addi a1, a1, 16 -; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: vs1r.v v9, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; SPILL-O0-NEXT: vfadd.vv v8, v8, v9 +; SPILL-O0-NEXT: vfadd.vv v8, v9, v10 ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: lui a0, %hi(.L.str) ; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str) ; SPILL-O0-NEXT: call puts@plt ; SPILL-O0-NEXT: addi a1, sp, 16 -; SPILL-O0-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload +; SPILL-O0-NEXT: vl1r.v v10, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 ; SPILL-O0-NEXT: addi a1, a1, 16 -; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload +; SPILL-O0-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: # kill: def $x11 killed $x10 ; SPILL-O0-NEXT: lw a0, 8(sp) # 4-byte Folded Reload +; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; SPILL-O0-NEXT: vfadd.vv v8, v8, v9 +; SPILL-O0-NEXT: vfadd.vv v8, v9, v10 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 Index: llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll +++ llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll @@ -18,27 +18,31 @@ ; SPILL-O0-NEXT: slli a1, a1, 1 ; SPILL-O0-NEXT: sub sp, sp, a1 ; SPILL-O0-NEXT: sd a0, 16(sp) # 8-byte Folded Spill +; SPILL-O0-NEXT: vmv1r.v v10, v9 +; SPILL-O0-NEXT: vmv1r.v v9, v8 ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 ; SPILL-O0-NEXT: addi a1, a1, 32 -; SPILL-O0-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: vs1r.v v9, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; SPILL-O0-NEXT: vfadd.vv v8, v8, v9 +; SPILL-O0-NEXT: vfadd.vv v8, v9, v10 ; SPILL-O0-NEXT: addi a0, sp, 32 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: lui a0, %hi(.L.str) ; SPILL-O0-NEXT: addi a0, a0, %lo(.L.str) ; SPILL-O0-NEXT: call puts@plt ; SPILL-O0-NEXT: addi a1, sp, 32 -; SPILL-O0-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload +; SPILL-O0-NEXT: vl1r.v v10, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a1, vlenb ; SPILL-O0-NEXT: add a1, sp, a1 ; SPILL-O0-NEXT: addi a1, a1, 32 -; SPILL-O0-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload +; SPILL-O0-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload ; SPILL-O0-NEXT: # kill: def $x11 killed $x10 ; SPILL-O0-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; SPILL-O0-NEXT: vfadd.vv v8, v8, v9 +; SPILL-O0-NEXT: vfadd.vv v8, v9, v10 ; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 ; SPILL-O0-NEXT: add sp, sp, a0 Index: llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -78,11 +78,11 @@ ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: vrsub.vi v12, v11, 15 ; CHECK-NEXT: vrgather.vv v10, v8, v12 +; CHECK-NEXT: vrsub.vi v8, v11, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; CHECK-NEXT: vrsub.vi v8, v11, 7 ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -224,11 +224,11 @@ ; CHECK-NEXT: vid.v v14 ; CHECK-NEXT: vrsub.vi v16, v14, 15 ; CHECK-NEXT: vrgather.vv v10, v8, v16 +; CHECK-NEXT: vrsub.vi v8, v14, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vrsub.vi v8, v14, 7 ; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -373,11 +373,11 @@ ; CHECK-NEXT: vid.v v20 ; CHECK-NEXT: vrsub.vi v24, v20, 15 ; CHECK-NEXT: vrgather.vv v12, v8, v24 +; CHECK-NEXT: vrsub.vi v8, v20, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vrsub.vi v8, v20, 7 ; CHECK-NEXT: vrgather.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -604,11 +604,11 @@ ; CHECK-NEXT: vid.v v14 ; CHECK-NEXT: vrsub.vi v16, v14, 15 ; CHECK-NEXT: vrgather.vv v10, v8, v16 +; CHECK-NEXT: vrsub.vi v8, v14, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vrsub.vi v8, v14, 7 ; CHECK-NEXT: vrgather.vv v10, v12, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -724,11 +724,11 @@ ; CHECK-NEXT: vid.v v20 ; CHECK-NEXT: vrsub.vi v24, v20, 15 ; CHECK-NEXT: vrgather.vv v12, v8, v24 +; CHECK-NEXT: vrsub.vi v8, v20, 7 ; CHECK-NEXT: li a0, 255 ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; CHECK-NEXT: vmv.v.x v0, a0 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vrsub.vi v8, v20, 7 ; CHECK-NEXT: vrgather.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret Index: llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll +++ llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll @@ -9,18 +9,18 @@ define <2 x i64> @vec_v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { ; CHECK-LABEL: vec_v2i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: srli a1, a0, 1 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vmv.v.x v10, a1 -; CHECK-NEXT: vsll.vv v11, v8, v9 -; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vsll.vv v10, v8, v9 +; CHECK-NEXT: vsra.vv v9, v10, v9 ; CHECK-NEXT: vmsne.vv v9, v8, v9 +; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: srli a1, a0, 1 +; CHECK-NEXT: vmv.v.x v11, a1 ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: slli a0, a0, 63 -; CHECK-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-NEXT: vmerge.vxm v8, v11, a0, v0 ; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %tmp = call <2 x i64> @llvm.sshl.sat.v2i64(<2 x i64> %x, <2 x i64> %y) ret <2 x i64> %tmp @@ -29,19 +29,19 @@ define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: addiw a0, a0, -1 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsll.vv v11, v8, v9 -; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vsll.vv v10, v8, v9 +; CHECK-NEXT: vsra.vv v9, v10, v9 ; CHECK-NEXT: vmsne.vv v9, v8, v9 +; CHECK-NEXT: lui a0, 524288 +; CHECK-NEXT: addiw a0, a0, -1 +; CHECK-NEXT: vmv.v.x v11, a0 ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, 1 ; CHECK-NEXT: slli a0, a0, 31 -; CHECK-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-NEXT: vmerge.vxm v8, v11, a0, v0 ; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %tmp = call <4 x i32> @llvm.sshl.sat.v4i32(<4 x i32> %x, <4 x i32> %y) ret <4 x i32> %tmp @@ -50,17 +50,17 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { ; CHECK-LABEL: vec_v8i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 8 -; CHECK-NEXT: addiw a1, a0, -1 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: vsll.vv v10, v8, v9 ; CHECK-NEXT: vsra.vv v9, v10, v9 -; CHECK-NEXT: vmsne.vv v8, v8, v9 -; CHECK-NEXT: vmv.v.x v9, a1 -; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0 -; CHECK-NEXT: vmv.v.v v0, v8 -; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0 +; CHECK-NEXT: vmsne.vv v9, v8, v9 +; CHECK-NEXT: lui a0, 8 +; CHECK-NEXT: addiw a1, a0, -1 +; CHECK-NEXT: vmsle.vi v0, v8, -1 +; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: vmv.v.v v0, v9 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %tmp = call <8 x i16> @llvm.sshl.sat.v8i16(<8 x i16> %x, <8 x i16> %y) ret <8 x i16> %tmp @@ -69,17 +69,17 @@ define <16 x i8> @vec_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { ; CHECK-LABEL: vec_v16i8: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 127 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsll.vv v11, v8, v9 -; CHECK-NEXT: vsra.vv v9, v11, v9 +; CHECK-NEXT: vsll.vv v10, v8, v9 +; CHECK-NEXT: vsra.vv v9, v10, v9 ; CHECK-NEXT: vmsne.vv v9, v8, v9 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vmv.v.x v11, a0 ; CHECK-NEXT: vmsle.vi v0, v8, -1 ; CHECK-NEXT: li a0, 128 -; CHECK-NEXT: vmerge.vxm v8, v10, a0, v0 +; CHECK-NEXT: vmerge.vxm v8, v11, a0, v0 ; CHECK-NEXT: vmv.v.v v0, v9 -; CHECK-NEXT: vmerge.vvm v8, v11, v8, v0 +; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 ; CHECK-NEXT: ret %tmp = call <16 x i8> @llvm.sshl.sat.v16i8(<16 x i8> %x, <16 x i8> %y) ret <16 x i8> %tmp Index: llvm/test/CodeGen/RISCV/rvv/stepvector.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -580,8 +580,8 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 1 ; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vid.v v8 ; RV32-NEXT: vadd.vv v8, v8, v8 @@ -591,11 +591,11 @@ ; ; RV64-LABEL: add_stepvector_nxv16i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 1 -; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV64-NEXT: vid.v v8 ; RV64-NEXT: vadd.vv v8, v8, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 ; RV64-NEXT: vadd.vx v16, v8, a0 ; RV64-NEXT: ret entry: Index: llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir +++ llvm/test/CodeGen/RISCV/rvv/subregister-undef-early-clobber.mir @@ -17,7 +17,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_1 - ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 undef %5, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M4 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -29,7 +29,7 @@ %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm4 = INSERT_SUBREG %1:vrm4, %5, %subreg.sub_vrm1_0 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 undef %0, killed %6, 0, 0, 5/* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M4 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -53,7 +53,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_0 - ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 undef %5, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M4 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -65,7 +65,7 @@ %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm4 = INSERT_SUBREG %1:vrm4, %5, %subreg.sub_vrm1_1 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M4 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -89,7 +89,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_3 - ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 undef %5, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M4 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -101,7 +101,7 @@ %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm4 = INSERT_SUBREG %1:vrm4, %5, %subreg.sub_vrm1_2 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M4 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -125,7 +125,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_2 - ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 undef %5, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M4 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -137,7 +137,7 @@ %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm4 = INSERT_SUBREG %1:vrm4, %5, %subreg.sub_vrm1_3 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M4 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -159,7 +159,7 @@ ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 - ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 undef %5, killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M4 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -171,7 +171,7 @@ %5:vrm2 = PseudoVLE32_V_M2 %pt, killed %7:gpr, 0, 5, 0 %6:vrm4 = INSERT_SUBREG %1:vrm4, %5, %subreg.sub_vrm2_0 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M4 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -193,7 +193,7 @@ ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm4 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 - ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm4 = PseudoVRGATHER_VI_M4 undef %5, killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M4 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -205,7 +205,7 @@ %5:vrm2 = PseudoVLE32_V_M2 %pt, killed %7:gpr, 0, 5, 0 %6:vrm4 = INSERT_SUBREG %1:vrm4, %5, %subreg.sub_vrm2_1 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm4 = PseudoVRGATHER_VI_M4 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M4 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -232,7 +232,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_1 - ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 undef %5, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -244,7 +244,7 @@ %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_0 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M8 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -270,7 +270,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_0 - ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 undef %5, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -282,7 +282,7 @@ %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_1 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M8 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -308,7 +308,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_3 - ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 undef %5, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -320,7 +320,7 @@ %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_2 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M8 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -346,7 +346,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_2 - ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 undef %5, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -358,7 +358,7 @@ %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_3 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M8 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -384,7 +384,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_3 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_5 - ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 undef %5, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -396,7 +396,7 @@ %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_4 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M8 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -422,7 +422,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_3 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_4 - ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 undef %5, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -434,7 +434,7 @@ %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_5 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M8 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -460,7 +460,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_2 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_7 - ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 undef %5, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -472,7 +472,7 @@ %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_6 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M8 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -498,7 +498,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_2 ; CHECK-NEXT: [[PseudoRVVInitUndefM1_:%[0-9]+]]:vr = PseudoRVVInitUndefM1 ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoRVVInitUndefM1_]], %subreg.sub_vrm1_6 - ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 undef %5, killed [[INSERT_SUBREG3]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -510,7 +510,7 @@ %5:vr = PseudoVLE32_V_M1 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm1_7 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M8 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -534,7 +534,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_1 ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_1 - ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 undef %5, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -546,7 +546,7 @@ %5:vrm2 = PseudoVLE32_V_M2 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm2_0 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M8 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -570,7 +570,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_1 ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_0 - ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 undef %5, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -582,7 +582,7 @@ %5:vrm2 = PseudoVLE32_V_M2 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm2_1 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M8 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -606,7 +606,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_0 ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_3 - ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 undef %5, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -618,7 +618,7 @@ %5:vrm2 = PseudoVLE32_V_M2 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm2_2 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M8 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -642,7 +642,7 @@ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_0 ; CHECK-NEXT: [[PseudoRVVInitUndefM2_:%[0-9]+]]:vrm2 = PseudoRVVInitUndefM2 ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoRVVInitUndefM2_]], %subreg.sub_vrm2_2 - ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 undef %5, killed [[INSERT_SUBREG2]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -654,7 +654,7 @@ %5:vrm2 = PseudoVLE32_V_M2 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm2_3 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M8 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -676,7 +676,7 @@ ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_1 - ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 undef %5, killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -688,7 +688,7 @@ %5:vrm4 = PseudoVLE32_V_M4 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm4_0 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M8 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 @@ -710,7 +710,7 @@ ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoRVVInitUndefM4_:%[0-9]+]]:vrm4 = PseudoRVVInitUndefM4 ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrm8 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoRVVInitUndefM4_]], %subreg.sub_vrm4_0 - ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber %5:vrm8 = PseudoVRGATHER_VI_M8 undef %5, killed [[INSERT_SUBREG1]], 0, 0, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI $x0, 0 ; CHECK-NEXT: PseudoVSE32_V_M8 killed %5, killed [[ADDI1]], 0, 5 /* e32 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0 @@ -722,7 +722,7 @@ %5:vrm4 = PseudoVLE32_V_M4 %pt, killed %7:gpr, 0, 5, 0 %6:vrm8 = INSERT_SUBREG %1:vrm8, %5, %subreg.sub_vrm4_1 dead $x0 = PseudoVSETIVLI 0, 210 /* e32, m4, ta, ma */, implicit-def $vl, implicit-def $vtype - early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 killed %6, 0, 0, 5 /* e32 */, implicit $vl, implicit $vtype + early-clobber %0:vrm8 = PseudoVRGATHER_VI_M8 undef %0, killed %6, 0, 0, 5 /* e32 */, 0, implicit $vl, implicit $vtype %2:gpr = ADDI $x0, 0 PseudoVSE32_V_M8 killed %0, killed %2, 0, 5 /* e32 */, implicit $vl, implicit $vtype %3:gpr = COPY $x0 Index: llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -17,12 +17,12 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 +; RV32-NEXT: vadd.vi v12, v11, -16 ; RV32-NEXT: lui a0, 16 ; RV32-NEXT: addi a0, a0, -256 ; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV32-NEXT: vadd.vi v12, v11, -16 ; RV32-NEXT: vrgather.vv v9, v8, v12, v0.t ; RV32-NEXT: vmsne.vi v9, v9, 0 ; RV32-NEXT: vadd.vi v12, v11, 1 @@ -45,12 +45,12 @@ ; RV64-NEXT: vslidedown.vi v0, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vmerge.vim v8, v8, 1, v0 +; RV64-NEXT: vadd.vi v12, v11, -16 ; RV64-NEXT: lui a0, 16 ; RV64-NEXT: addiw a0, a0, -256 ; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu -; RV64-NEXT: vadd.vi v12, v11, -16 ; RV64-NEXT: vrgather.vv v9, v8, v12, v0.t ; RV64-NEXT: vmsne.vi v9, v9, 0 ; RV64-NEXT: vadd.vi v12, v11, 1 @@ -107,14 +107,16 @@ define {<2 x i64>, <2 x i64>} @vector_deinterleave_v2i64_v4i64(<4 x i64> %vec) { ; CHECK-LABEL: vector_deinterleave_v2i64_v4i64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v8, 2 +; CHECK-NEXT: vslidedown.vi v12, v8, 2 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vrgather.vi v9, v8, 1 -; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t -; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: vrgather.vi v10, v12, 1, v0.t +; CHECK-NEXT: vslideup.vi v8, v12, 1 +; CHECK-NEXT: vmv.v.v v9, v10 ; CHECK-NEXT: ret %retval = call {<2 x i64>, <2 x i64>} @llvm.experimental.vector.deinterleave2.v4i64(<4 x i64> %vec) ret {<2 x i64>, <2 x i64>} %retval @@ -194,14 +196,16 @@ define {<2 x double>, <2 x double>} @vector_deinterleave_v2f64_v4f64(<4 x double> %vec) { ; CHECK-LABEL: vector_deinterleave_v2f64_v4f64: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vrgather.vi v10, v8, 1 ; CHECK-NEXT: vsetivli zero, 2, e64, m2, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v8, 2 +; CHECK-NEXT: vslidedown.vi v12, v8, 2 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 2 ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu -; CHECK-NEXT: vrgather.vi v9, v8, 1 -; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t -; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: vrgather.vi v10, v12, 1, v0.t +; CHECK-NEXT: vslideup.vi v8, v12, 1 +; CHECK-NEXT: vmv.v.v v9, v10 ; CHECK-NEXT: ret %retval = call {<2 x double>, <2 x double>} @llvm.experimental.vector.deinterleave2.v4f64(<4 x double> %vec) ret {<2 x double>, <2 x double>} %retval Index: llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -99,29 +99,26 @@ ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: vl8re64.v v16, (a1) ; CHECK-NEXT: vl8re64.v v24, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: vadd.vv v0, v8, v8 -; CHECK-NEXT: vrgather.vv v8, v24, v0 -; CHECK-NEXT: vrgather.vv v24, v16, v0 +; CHECK-NEXT: vrgather.vv v8, v16, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vrgather.vv v8, v24, v0 ; CHECK-NEXT: vadd.vi v0, v0, 1 ; CHECK-NEXT: vrgather.vv v24, v16, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vrgather.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb @@ -130,7 +127,10 @@ ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v12, v24 -; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v20, v24 ; CHECK-NEXT: csrr a0, vlenb Index: llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -91,24 +91,39 @@ define {, } @vector_deinterleave_nxv64i1_nxv128i1( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv64i1_nxv128i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv1r.v v28, v8 +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v16, v8, 1, v0 +; CHECK-NEXT: vmv.v.i v24, 0 +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: vmerge.vim v16, v24, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v16, 0 +; CHECK-NEXT: vnsrl.wi v12, v16, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv1r.v v0, v28 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v24, v24, 1, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v28, v8, 0 +; CHECK-NEXT: vnsrl.wi v8, v24, 0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v24, v16, 8 -; CHECK-NEXT: vnsrl.wi v28, v8, 8 +; CHECK-NEXT: vnsrl.wi v4, v16, 8 +; CHECK-NEXT: vnsrl.wi v0, v24, 8 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vmsne.vi v8, v24, 0 +; CHECK-NEXT: vmsne.vi v8, v0, 0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv128i1( %vec) ret {, } %retval @@ -119,10 +134,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v24, 0 ; CHECK-NEXT: vnsrl.wi v12, v16, 0 -; CHECK-NEXT: vnsrl.wi v0, v24, 8 +; CHECK-NEXT: vnsrl.wi v8, v24, 0 ; CHECK-NEXT: vnsrl.wi v4, v16, 8 +; CHECK-NEXT: vnsrl.wi v0, v24, 8 ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv128i8( %vec) @@ -134,10 +149,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v24, 0 ; CHECK-NEXT: vnsrl.wi v12, v16, 0 -; CHECK-NEXT: vnsrl.wi v0, v24, 16 +; CHECK-NEXT: vnsrl.wi v8, v24, 0 ; CHECK-NEXT: vnsrl.wi v4, v16, 16 +; CHECK-NEXT: vnsrl.wi v0, v24, 16 ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv64i16( %vec) @@ -152,8 +167,8 @@ ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wx v20, v24, a0 ; CHECK-NEXT: vnsrl.wx v16, v8, a0 -; CHECK-NEXT: vnsrl.wi v0, v8, 0 ; CHECK-NEXT: vnsrl.wi v4, v24, 0 +; CHECK-NEXT: vnsrl.wi v0, v8, 0 ; CHECK-NEXT: vmv8r.v v8, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv32i32( %vec) @@ -218,26 +233,16 @@ ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vrgather.vv v16, v24, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv4r.v v24, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v12, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v28, v16 -; CHECK-NEXT: vmv8r.v v16, v24 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v20, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 @@ -350,10 +355,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v8, v24, 0 ; CHECK-NEXT: vnsrl.wi v12, v16, 0 -; CHECK-NEXT: vnsrl.wi v0, v24, 16 +; CHECK-NEXT: vnsrl.wi v8, v24, 0 ; CHECK-NEXT: vnsrl.wi v4, v16, 16 +; CHECK-NEXT: vnsrl.wi v0, v24, 16 ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv64f16( %vec) @@ -368,8 +373,8 @@ ; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wx v20, v24, a0 ; CHECK-NEXT: vnsrl.wx v16, v8, a0 -; CHECK-NEXT: vnsrl.wi v0, v8, 0 ; CHECK-NEXT: vnsrl.wi v4, v24, 0 +; CHECK-NEXT: vnsrl.wi v0, v8, 0 ; CHECK-NEXT: vmv8r.v v8, v0 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv32f32( %vec) @@ -434,26 +439,16 @@ ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vrgather.vv v16, v24, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv4r.v v24, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v12, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v28, v16 -; CHECK-NEXT: vmv8r.v v16, v24 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v20, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 Index: llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir +++ llvm/test/CodeGen/RISCV/rvv/vmv-copy.mir @@ -176,12 +176,13 @@ ; CHECK-NEXT: $x15 = PseudoVSETIVLI 4, 73 /* e16, m2, ta, mu */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: $v26m2 = PseudoVLE16_V_M2 undef $v26m2, killed $x16, $noreg, 4 /* e16 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v8m2 = PseudoVLE16_V_M2 undef $v8m2, killed $x17, $noreg, 4 /* e16 */, 0 /* tu, mu */, implicit $vl, implicit $vtype - ; CHECK-NEXT: early-clobber $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2, $noreg, 4 /* e16 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: early-clobber $v28m4 = PseudoVWADD_VV_M2 undef $v28m4, $v26m2, $v8m2, $noreg, 4 /* e16 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v12m2 = VMV2R_V $v28m2 $x15 = PseudoVSETIVLI 4, 73, implicit-def $vl, implicit-def $vtype $v26m2 = PseudoVLE16_V_M2 undef $v26m2, killed $x16, $noreg, 4, 0, implicit $vl, implicit $vtype $v8m2 = PseudoVLE16_V_M2 undef $v8m2, killed $x17, $noreg, 4, 0, implicit $vl, implicit $vtype - $v28m4 = PseudoVWADD_VV_M2 $v26m2, $v8m2, $noreg, 4, implicit $vl, implicit $vtype + + $v28m4 = PseudoVWADD_VV_M2 undef $v28m4, $v26m2, $v8m2, $noreg, 4, 0, implicit $vl, implicit $vtype $v12m2 = COPY $v28m2 ... --- Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -206,13 +206,15 @@ ; CHECK-NEXT: bb.1.if.then: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoBR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.if.else: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %pt3:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 %pt3, [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.if.end: ; CHECK-NEXT: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2 @@ -233,11 +235,13 @@ PseudoBR %bb.1 bb.1.if.then: - %1:vr = PseudoVADD_VV_M1 %0, %6, %7, 6 + %pt2:vr = IMPLICIT_DEF + %1:vr = PseudoVADD_VV_M1 %pt2, %0, %6, %7, 6, 0 PseudoBR %bb.3 bb.2.if.else: - %2:vr = PseudoVSUB_VV_M1 %0, %6, %7, 6 + %pt3:vr = IMPLICIT_DEF + %2:vr = PseudoVSUB_VV_M1 %pt3, %0, %6, %7, 6, 0 bb.3.if.end: %3:vr = PHI %1, %bb.1, %2, %bb.2 @@ -371,15 +375,17 @@ ; CHECK-NEXT: bb.1.if.then: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt, [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoBR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.if.else: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[COPY1]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 %pt2, [[COPY1]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.if.end: ; CHECK-NEXT: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2 @@ -399,11 +405,13 @@ PseudoBR %bb.1 bb.1.if.then: - %0:vr = PseudoVADD_VV_M1 %4, %5, %6, 6 + %pt:vr = IMPLICIT_DEF + %0:vr = PseudoVADD_VV_M1 %pt, %4, %5, %6, 6, 0 PseudoBR %bb.3 bb.2.if.else: - %1:vr = PseudoVSUB_VV_M1 %5, %5, %6, 6 + %pt2:vr = IMPLICIT_DEF + %1:vr = PseudoVSUB_VV_M1 %pt2, %5, %5, %6, 6, 0 bb.3.if.end: %2:vr = PHI %0, %bb.1, %1, %bb.2 @@ -452,13 +460,15 @@ ; CHECK-NEXT: bb.1.if.then: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt, [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoBR %bb.3 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.2.if.else: ; CHECK-NEXT: successors: %bb.3(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVSUB_VV_M1_:%[0-9]+]]:vr = PseudoVSUB_VV_M1 %pt2, [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3.if.end: ; CHECK-NEXT: [[PHI:%[0-9]+]]:vr = PHI [[PseudoVADD_VV_M1_]], %bb.1, [[PseudoVSUB_VV_M1_]], %bb.2 @@ -478,11 +488,13 @@ PseudoBR %bb.1 bb.1.if.then: - %1:vr = PseudoVADD_VV_M1 %5, %6, %0, 6 + %pt:vr = IMPLICIT_DEF + %1:vr = PseudoVADD_VV_M1 %pt, %5, %6, %0, 6, 0 PseudoBR %bb.3 bb.2.if.else: - %2:vr = PseudoVSUB_VV_M1 %5, %6, %0, 6 + %pt2:vr = IMPLICIT_DEF + %2:vr = PseudoVSUB_VV_M1 %pt2, %5, %6, %0, 6, 0 bb.3.if.end: %3:vr = PHI %1, %bb.1, %2, %bb.2 @@ -542,8 +554,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.3: ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[DEF]], %bb.1, [[LWU]], %bb.2 + ; CHECK-NEXT: %pt3:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 215 /* e32, mf2, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl - ; CHECK-NEXT: [[PseudoVADD_VX_MF2_:%[0-9]+]]:vr = nsw PseudoVADD_VX_MF2 [[PseudoVLE32_V_MF2_MASK]], [[PHI]], -1, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVADD_VX_MF2_:%[0-9]+]]:vr = nsw PseudoVADD_VX_MF2 %pt3, [[PseudoVLE32_V_MF2_MASK]], [[PHI]], -1, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v0 = COPY [[PseudoVADD_VX_MF2_]] ; CHECK-NEXT: PseudoRET implicit $v0 bb.0: @@ -576,7 +589,8 @@ bb.3: %10:gpr = PHI %2, %bb.1, %9, %bb.2 - %11:vr = nsw PseudoVADD_VX_MF2 %6, %10, -1, 5 + %pt3:vr = IMPLICIT_DEF + %11:vr = nsw PseudoVADD_VX_MF2 %pt3, %6, %10, -1, 5, 0 $v0 = COPY %11 PseudoRET implicit $v0 ... @@ -614,7 +628,8 @@ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, %10, %bb.1 - ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 [[PseudoVID_V_M1_]], [[PHI]], -1, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 %pt2, [[PseudoVID_V_M1_]], [[PHI]], -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[PHI]], [[SRLI]] ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY]], [[MUL]] ; CHECK-NEXT: PseudoVSE32_V_MF2 killed [[PseudoVADD_VX_M1_]], killed [[ADD]], -1, 5 /* e32 */, implicit $vl, implicit $vtype @@ -638,7 +653,8 @@ successors: %bb.1, %bb.2 %6:gpr = PHI %5:gpr, %bb.0, %10:gpr, %bb.1 - %7:vr = PseudoVADD_VX_M1 %4:vr, %6:gpr, -1, 6 + %pt2:vr = IMPLICIT_DEF + %7:vr = PseudoVADD_VX_M1 %pt2, %4:vr, %6:gpr, -1, 6, 0 %8:gpr = MUL %6:gpr, %2:gpr %9:gpr = ADD %0:gpr, %8:gpr PseudoVSE32_V_MF2 killed %7:vr, killed %9:gpr, -1, 5 @@ -684,7 +700,8 @@ ; CHECK-NEXT: successors: %bb.2(0x80000000) ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, %10, %bb.2 - ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 [[PseudoVID_V_M1_]], [[PHI]], -1, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 %pt2, [[PseudoVID_V_M1_]], [[PHI]], -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[MUL:%[0-9]+]]:gpr = MUL [[PHI]], [[SRLI]] ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY]], [[MUL]] ; CHECK-NEXT: PseudoVSE32_V_MF2 killed [[PseudoVADD_VX_M1_]], killed [[ADD]], -1, 5 /* e32 */, implicit $vl, implicit $vtype @@ -712,7 +729,8 @@ successors: %bb.3 %6:gpr = PHI %5:gpr, %bb.0, %10:gpr, %bb.3 - %7:vr = PseudoVADD_VX_M1 %4:vr, %6:gpr, -1, 6 + %pt2:vr = IMPLICIT_DEF + %7:vr = PseudoVADD_VX_M1 %pt2, %4:vr, %6:gpr, -1, 6, 0 %8:gpr = MUL %6:gpr, %2:gpr %9:gpr = ADD %0:gpr, %8:gpr PseudoVSE32_V_MF2 killed %7:vr, killed %9:gpr, -1, 5 @@ -787,7 +805,8 @@ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:vr = PHI [[COPY3]], %bb.0, %16, %bb.1 ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF ; CHECK-NEXT: [[PseudoVLE32_V_M1_:%[0-9]+]]:vr = PseudoVLE32_V_M1 %pt, [[PHI]], 4, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.lsr.iv12, align 4) - ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE32_V_M1_]], [[PHI2]], 4, 5 /* e32 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, killed [[PseudoVLE32_V_M1_]], [[PHI2]], 4, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = nsw ADDI [[PHI1]], -4 ; CHECK-NEXT: [[ADDI1:%[0-9]+]]:gpr = ADDI [[PHI]], 16 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x0 @@ -823,7 +842,8 @@ %2:vr = PHI %10, %bb.0, %16, %bb.1 %pt:vr = IMPLICIT_DEF %14:vr = PseudoVLE32_V_M1 %pt, %0, 4, 5, 0 :: (load (s128) from %ir.lsr.iv12, align 4) - %16:vr = PseudoVADD_VV_M1 killed %14, %2, 4, 5 + %pt2:vr = IMPLICIT_DEF + %16:vr = PseudoVADD_VV_M1 %pt2, killed %14, %2, 4, 5, 0 %4:gpr = nsw ADDI %1, -4 %5:gpr = ADDI %0, 16 %18:gpr = COPY $x0 @@ -950,8 +970,9 @@ ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[PHI:%[0-9]+]]:gpr = PHI [[COPY3]], %bb.0, %12, %bb.3 ; CHECK-NEXT: [[ADD:%[0-9]+]]:gpr = ADD [[COPY2]], [[PHI]] + ; CHECK-NEXT: %pta:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl - ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 [[PseudoVID_V_M1_]], killed [[ADD]], -1, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVADD_VX_M1_:%[0-9]+]]:vr = PseudoVADD_VX_M1 %pta, [[PseudoVID_V_M1_]], killed [[ADD]], -1, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[PseudoVMSLTU_VX_M1_:%[0-9]+]]:vr = PseudoVMSLTU_VX_M1 [[PseudoVADD_VX_M1_]], [[COPY1]], -1, 6 /* e64 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[PseudoVCPOP_M_B1_:%[0-9]+]]:gpr = PseudoVCPOP_M_B1 [[PseudoVMSLTU_VX_M1_]], -1, 0 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY $x0 @@ -964,8 +985,9 @@ ; CHECK-NEXT: [[ADD1:%[0-9]+]]:gpr = ADD %src, [[PHI]] ; CHECK-NEXT: %pt2:vrnov0 = IMPLICIT_DEF ; CHECK-NEXT: [[PseudoVLE8_V_MF8_:%[0-9]+]]:vrnov0 = PseudoVLE8_V_MF8 %pt2, killed [[ADD1]], -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %ptb:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLIX0 killed $x0, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype, implicit $vl - ; CHECK-NEXT: [[PseudoVADD_VI_MF8_:%[0-9]+]]:vrnov0 = PseudoVADD_VI_MF8 [[PseudoVLE8_V_MF8_]], 4, -1, 3 /* e8 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVADD_VI_MF8_:%[0-9]+]]:vrnov0 = PseudoVADD_VI_MF8 %ptb, [[PseudoVLE8_V_MF8_]], 4, -1, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: [[ADD2:%[0-9]+]]:gpr = ADD %dst, [[PHI]] ; CHECK-NEXT: PseudoVSE8_V_MF8 killed [[PseudoVADD_VI_MF8_]], killed [[ADD2]], -1, 3 /* e8 */, implicit $vl, implicit $vtype ; CHECK-NEXT: {{ $}} @@ -1000,7 +1022,8 @@ %26:gpr = PHI %59, %bb.0, %28, %bb.3 %61:gpr = ADD %12, %26 - %27:vr = PseudoVADD_VX_M1 %10, killed %61, -1, 6 + %pta:vr = IMPLICIT_DEF + %27:vr = PseudoVADD_VX_M1 %pta, %10, killed %61, -1, 6, 0 %62:vr = PseudoVMSLTU_VX_M1 %27, %11, -1, 6 %63:gpr = PseudoVCPOP_M_B1 %62, -1, 0 %64:gpr = COPY $x0 @@ -1013,7 +1036,8 @@ %66:gpr = ADD %src, %26 %pt2:vrnov0 = IMPLICIT_DEF %67:vrnov0 = PseudoVLE8_V_MF8 %pt2, killed %66, -1, 3, 0 - %76:vrnov0 = PseudoVADD_VI_MF8 %67, 4, -1, 3 + %ptb:vr = IMPLICIT_DEF + %76:vrnov0 = PseudoVADD_VI_MF8 %ptb, %67, 4, -1, 3, 0 %77:gpr = ADD %dst, %26 PseudoVSE8_V_MF8 killed %76, killed %77, -1, 3 Index: llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir +++ llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir @@ -121,14 +121,16 @@ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnox0 = COPY $x10 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v8 + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt, [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]] ; CHECK-NEXT: PseudoRET implicit $v8 %2:gprnox0 = COPY $x10 %1:vr = COPY $v9 %0:vr = COPY $v8 - %3:vr = PseudoVADD_VV_M1 %0, %1, %2, 6 + %pt:vr = IMPLICIT_DEF + %3:vr = PseudoVADD_VV_M1 %pt, %0, %1, %2, 6, 0 $v8 = COPY %3 PseudoRET implicit $v8 @@ -163,7 +165,8 @@ ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY2]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype - ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]] ; CHECK-NEXT: PseudoRET implicit $v8 %2:gprnox0 = COPY $x11 @@ -171,7 +174,8 @@ %0:gpr = COPY $x10 %pt:vr = IMPLICIT_DEF %3:vr = PseudoVLE64_V_M1 %pt, %0, %2, 6, 0 - %4:vr = PseudoVADD_VV_M1 killed %3, %1, %2, 6 + %pt2:vr = IMPLICIT_DEF + %4:vr = PseudoVADD_VV_M1 %pt2, killed %3, %1, %2, 6, 0 $v8 = COPY %4 PseudoRET implicit $v8 @@ -277,7 +281,8 @@ ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 2, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY1]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.x) ; CHECK-NEXT: [[PseudoVLE64_V_M1_1:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt2, [[COPY]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype :: (load (s128) from %ir.y) - ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], killed [[PseudoVLE64_V_M1_1]], 2, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %pt3:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt3, killed [[PseudoVLE64_V_M1_]], killed [[PseudoVLE64_V_M1_1]], 2, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: PseudoVSE64_V_M1 killed [[PseudoVADD_VV_M1_]], [[COPY1]], 2, 6 /* e64 */, implicit $vl, implicit $vtype :: (store (s128) into %ir.x) ; CHECK-NEXT: PseudoRET %1:gpr = COPY $x11 @@ -286,7 +291,8 @@ %pt2:vr = IMPLICIT_DEF %2:vr = PseudoVLE64_V_M1 %pt, %0, 2, 6, 0 :: (load (s128) from %ir.x) %3:vr = PseudoVLE64_V_M1 %pt2, %1, 2, 6, 0 :: (load (s128) from %ir.y) - %4:vr = PseudoVADD_VV_M1 killed %2, killed %3, 2, 6 + %pt3:vr = IMPLICIT_DEF + %4:vr = PseudoVADD_VV_M1 %pt3, killed %2, killed %3, 2, 6, 0 PseudoVSE64_V_M1 killed %4, %0, 2, 6 :: (store (s128) into %ir.x) PseudoRET @@ -365,14 +371,16 @@ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr = COPY $v9 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vr = COPY $v8 ; CHECK-NEXT: $x0 = PseudoVSETVLI [[COPY]], 88 /* e64, m1, ta, mu */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: %pt:vr = IMPLICIT_DEF + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt, [[COPY2]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]] ; CHECK-NEXT: PseudoRET implicit $v8 %2:gprnox0 = COPY $x10 %1:vr = COPY $v9 %0:vr = COPY $v8 %3:gprnox0 = PseudoVSETVLI %2, 88, implicit-def dead $vl, implicit-def dead $vtype - %4:vr = PseudoVADD_VV_M1 %0, %1, killed %3, 6 + %pt:vr = IMPLICIT_DEF + %4:vr = PseudoVADD_VV_M1 %pt, %0, %1, killed %3, 6, 0 $v8 = COPY %4 PseudoRET implicit $v8 @@ -408,8 +416,9 @@ ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 %pt, [[COPY2]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */ + ; CHECK-NEXT: %pt2:vr = IMPLICIT_DEF ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype - ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, implicit $vl, implicit $vtype + ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 %pt2, killed [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype ; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]] ; CHECK-NEXT: PseudoRET implicit $v8 %2:gprnox0 = COPY $x11 @@ -418,7 +427,8 @@ %pt:vr = IMPLICIT_DEF %3:vr = PseudoVLE64_V_M1 %pt, %0, %2, 6, 0 INLINEASM &"", 1 /* sideeffect attdialect */ - %4:vr = PseudoVADD_VV_M1 killed %3, %1, %2, 6 + %pt2:vr = IMPLICIT_DEF + %4:vr = PseudoVADD_VV_M1 %pt2, killed %3, %1, %2, 6, 0 $v8 = COPY %4 PseudoRET implicit $v8 Index: llvm/test/CodeGen/RISCV/rvv/vxrm.mir =================================================================== --- llvm/test/CodeGen/RISCV/rvv/vxrm.mir +++ llvm/test/CodeGen/RISCV/rvv/vxrm.mir @@ -1,4 +1,3 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 # RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -stop-after prologepilog -o - %s | FileCheck %s --check-prefix=MIR # RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=ASM @@ -14,7 +13,7 @@ ; MIR-NEXT: {{ $}} ; MIR-NEXT: dead $x0 = PseudoVSETVLI renamable $x10, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype ; MIR-NEXT: WriteVXRMImm 0, implicit-def $vxrm - ; MIR-NEXT: renamable $v8 = PseudoVAADD_VV_MF8 renamable $v8, renamable $v9, 0, $noreg, 3 /* e8 */, implicit $vl, implicit $vtype, implicit $vxrm + ; MIR-NEXT: renamable $v8 = PseudoVAADD_VV_MF8 undef $v8, renamable $v8, renamable $v9, 0, $noreg, 3 /* e8 */, 0 /* tu, mu */, implicit $vl, implicit $vtype, implicit $vxrm ; MIR-NEXT: PseudoRET implicit $v8 ; ASM-LABEL: verify_vxrm: ; ASM: # %bb.0: @@ -25,6 +24,7 @@ %0:vr = COPY $v8 %1:vr = COPY $v9 dead $x0 = PseudoVSETVLI killed renamable $x10, 197 /* e8, mf8, ta, ma */, implicit-def $vl, implicit-def $vtype - renamable $v8 = PseudoVAADD_VV_MF8 killed renamable $v8, killed renamable $v9, 0, $noreg, 3 /* e8 */ + %pt:vr = IMPLICIT_DEF + renamable $v8 = PseudoVAADD_VV_MF8 %pt, killed renamable $v8, killed renamable $v9, 0, $noreg, 3 /* e8 */, 0 PseudoRET implicit $v8 ... Index: llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll =================================================================== --- llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -722,21 +722,21 @@ ; ; RV64MV-LABEL: test_srem_vec: ; RV64MV: # %bb.0: -; RV64MV-NEXT: ld a1, 0(a0) +; RV64MV-NEXT: lbu a1, 12(a0) ; RV64MV-NEXT: lwu a2, 8(a0) -; RV64MV-NEXT: srli a3, a1, 2 -; RV64MV-NEXT: lbu a4, 12(a0) -; RV64MV-NEXT: slli a5, a2, 62 -; RV64MV-NEXT: or a3, a5, a3 -; RV64MV-NEXT: srai a3, a3, 31 -; RV64MV-NEXT: slli a4, a4, 32 +; RV64MV-NEXT: slli a1, a1, 32 +; RV64MV-NEXT: ld a3, 0(a0) +; RV64MV-NEXT: or a1, a2, a1 +; RV64MV-NEXT: slli a1, a1, 29 +; RV64MV-NEXT: srai a1, a1, 31 +; RV64MV-NEXT: srli a4, a3, 2 +; RV64MV-NEXT: slli a2, a2, 62 ; RV64MV-NEXT: or a2, a2, a4 -; RV64MV-NEXT: slli a2, a2, 29 ; RV64MV-NEXT: lui a4, %hi(.LCPI3_0) ; RV64MV-NEXT: ld a4, %lo(.LCPI3_0)(a4) ; RV64MV-NEXT: srai a2, a2, 31 -; RV64MV-NEXT: slli a1, a1, 31 -; RV64MV-NEXT: srai a1, a1, 31 +; RV64MV-NEXT: slli a3, a3, 31 +; RV64MV-NEXT: srai a3, a3, 31 ; RV64MV-NEXT: mulh a4, a2, a4 ; RV64MV-NEXT: srli a5, a4, 63 ; RV64MV-NEXT: srai a4, a4, 1 @@ -744,27 +744,27 @@ ; RV64MV-NEXT: lui a5, %hi(.LCPI3_1) ; RV64MV-NEXT: ld a5, %lo(.LCPI3_1)(a5) ; RV64MV-NEXT: add a2, a2, a4 -; RV64MV-NEXT: slli a4, a4, 2 -; RV64MV-NEXT: add a2, a2, a4 -; RV64MV-NEXT: mulh a4, a3, a5 -; RV64MV-NEXT: srli a5, a4, 63 -; RV64MV-NEXT: srai a4, a4, 1 -; RV64MV-NEXT: add a4, a4, a5 -; RV64MV-NEXT: lui a5, %hi(.LCPI3_2) -; RV64MV-NEXT: ld a5, %lo(.LCPI3_2)(a5) -; RV64MV-NEXT: add a3, a3, a4 ; RV64MV-NEXT: slli a4, a4, 3 -; RV64MV-NEXT: sub a3, a3, a4 -; RV64MV-NEXT: mulh a4, a1, a5 +; RV64MV-NEXT: sub a2, a2, a4 +; RV64MV-NEXT: mulh a4, a3, a5 ; RV64MV-NEXT: srli a5, a4, 63 ; RV64MV-NEXT: add a4, a4, a5 ; RV64MV-NEXT: li a5, 6 ; RV64MV-NEXT: mul a4, a4, a5 -; RV64MV-NEXT: sub a1, a1, a4 +; RV64MV-NEXT: sub a3, a3, a4 +; RV64MV-NEXT: lui a4, %hi(.LCPI3_2) +; RV64MV-NEXT: ld a4, %lo(.LCPI3_2)(a4) ; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64MV-NEXT: vslide1down.vx v8, v8, a1 ; RV64MV-NEXT: vslide1down.vx v8, v8, a3 ; RV64MV-NEXT: vslide1down.vx v8, v8, a2 +; RV64MV-NEXT: mulh a2, a1, a4 +; RV64MV-NEXT: srli a3, a2, 63 +; RV64MV-NEXT: srai a2, a2, 1 +; RV64MV-NEXT: add a2, a2, a3 +; RV64MV-NEXT: slli a3, a2, 2 +; RV64MV-NEXT: add a1, a1, a2 +; RV64MV-NEXT: add a1, a1, a3 +; RV64MV-NEXT: vslide1down.vx v8, v8, a1 ; RV64MV-NEXT: vslidedown.vi v8, v8, 1 ; RV64MV-NEXT: lui a1, %hi(.LCPI3_3) ; RV64MV-NEXT: addi a1, a1, %lo(.LCPI3_3)