diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -275,6 +275,8 @@ return; } + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + // FPR->FPR copies and VR->VR copies. unsigned Opc; bool IsScalableVector = true; @@ -303,6 +305,14 @@ } else if (RISCV::VRRegClass.contains(DstReg, SrcReg)) { Opc = RISCV::VMV1R_V; LMul = RISCVII::LMUL_1; + } else if (RISCV::VRIgnoreGroupRegClass.contains(DstReg) && + RISCV::VRRegClass.contains(SrcReg)) { + Opc = RISCV::VMV1R_V; + LMul = RISCVII::LMUL_1; + } else if (RISCV::VRRegClass.contains(DstReg) && + RISCV::VRIgnoreGroupRegClass.contains(SrcReg)) { + Opc = RISCV::VMV1R_V; + LMul = RISCVII::LMUL_1; } else if (RISCV::VRM2RegClass.contains(DstReg, SrcReg)) { Opc = RISCV::VMV2R_V; LMul = RISCVII::LMUL_2; @@ -368,6 +378,28 @@ NF = 8; LMul = RISCVII::LMUL_1; } else { + if (RISCV::VRIgnoreGroupRegClass.contains(DstReg) && + (RISCV::VRM2RegClass.contains(SrcReg) || + RISCV::VRM4RegClass.contains(SrcReg) || + RISCV::VRM8RegClass.contains(SrcReg))) { + auto SubSrcReg = TRI->getSubReg(SrcReg, RISCV::sub_vrm1_0); + if (SubSrcReg != DstReg) + BuildMI(MBB, MBBI, DL, get(RISCV::VMV1R_V)) + .addReg(DstReg, RegState::Define) + .addReg(SubSrcReg); + return; + } + if (RISCV::VRIgnoreGroupRegClass.contains(SrcReg) && + (RISCV::VRM2RegClass.contains(DstReg) || + RISCV::VRM4RegClass.contains(DstReg) || + RISCV::VRM8RegClass.contains(DstReg))) { + auto SubDstReg = TRI->getSubReg(DstReg, RISCV::sub_vrm1_0); + if (SubDstReg != SrcReg) + BuildMI(MBB, MBBI, DL, get(RISCV::VMV1R_V)) + .addReg(SubDstReg, RegState::Define) + .addReg(SrcReg); + return; + } llvm_unreachable("Impossible reg-to-reg copy"); } @@ -421,8 +453,6 @@ MIB.addReg(RISCV::VTYPE, RegState::Implicit); } } else { - const TargetRegisterInfo *TRI = STI.getRegisterInfo(); - int I = 0, End = NF, Incr = 1; unsigned SrcEncoding = TRI->getEncodingValue(SrcReg); unsigned DstEncoding = TRI->getEncodingValue(DstReg); @@ -489,6 +519,8 @@ IsScalableVector = false; } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { Opcode = RISCV::VS1R_V; + } else if (RISCV::VRIgnoreGroupRegClass.hasSubClassEq(RC)) { + Opcode = RISCV::VS1R_V; } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { Opcode = RISCV::VS2R_V; } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { @@ -572,6 +604,8 @@ IsScalableVector = false; } else if (RISCV::VRRegClass.hasSubClassEq(RC)) { Opcode = RISCV::VL1RE8_V; + } else if (RISCV::VRIgnoreGroupRegClass.hasSubClassEq(RC)) { + Opcode = RISCV::VL1RE8_V; } else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) { Opcode = RISCV::VL2RE8_V; } else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td @@ -5648,13 +5648,13 @@ let VLMul = m.value in { let HasSEWOp = 1, BaseInstr = VMV_X_S in def PseudoVMV_X_S # "_" # mx: - Pseudo<(outs GPR:$rd), (ins m.vrclass:$rs2, ixlenimm:$sew), []>, + Pseudo<(outs GPR:$rd), (ins VRIgnoreGroup:$rs2, ixlenimm:$sew), []>, Sched<[WriteVIMovVX_MX, ReadVIMovVX_MX]>, RISCVVPseudo; let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VMV_S_X, Constraints = "$rd = $rs1" in - def PseudoVMV_S_X # "_" # mx: Pseudo<(outs m.vrclass:$rd), - (ins m.vrclass:$rs1, GPR:$rs2, + def PseudoVMV_S_X # "_" # mx: Pseudo<(outs VRIgnoreGroup:$rd), + (ins VRIgnoreGroup:$rs1, GPR:$rs2, AVL:$vl, ixlenimm:$sew), []>, Sched<[WriteVIMovXV_MX, ReadVIMovXV_MX, ReadVIMovXX_MX]>, @@ -5682,14 +5682,14 @@ let HasSEWOp = 1, BaseInstr = VFMV_F_S in def "PseudoVFMV_" # f.FX # "_S_" # mx : Pseudo<(outs f.fprclass:$rd), - (ins m.vrclass:$rs2, ixlenimm:$sew), []>, + (ins VRIgnoreGroup:$rs2, ixlenimm:$sew), []>, Sched<[WriteVFMovVF_MX, ReadVFMovVF_MX]>, RISCVVPseudo; let HasVLOp = 1, HasSEWOp = 1, BaseInstr = VFMV_S_F, Constraints = "$rd = $rs1" in def "PseudoVFMV_S_" # f.FX # "_" # mx : - Pseudo<(outs m.vrclass:$rd), - (ins m.vrclass:$rs1, f.fprclass:$rs2, + Pseudo<(outs VRIgnoreGroup:$rd), + (ins VRIgnoreGroup:$rs1, f.fprclass:$rs2, AVL:$vl, ixlenimm:$sew), []>, Sched<[WriteVFMovFV_MX, ReadVFMovFV_MX, ReadVFMovFX_MX]>, @@ -6242,10 +6242,10 @@ foreach fvti = AllFloatVectors in { defvar instr = !cast("PseudoVFMV_"#fvti.ScalarSuffix#"_S_" # fvti.LMul.MX); - def : Pat<(fvti.Scalar (int_riscv_vfmv_f_s (fvti.Vector fvti.RegClass:$rs2))), + def : Pat<(fvti.Scalar (int_riscv_vfmv_f_s (fvti.Vector VRIgnoreGroup:$rs2))), (instr $rs2, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1), + def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector VRIgnoreGroup:$rs1), (fvti.Scalar fvti.ScalarRegClass:$rs2), VLOpFrag)), (!cast("PseudoVFMV_S_"#fvti.ScalarSuffix#"_" # fvti.LMul.MX) @@ -6253,7 +6253,7 @@ (fvti.Scalar fvti.ScalarRegClass:$rs2), GPR:$vl, fvti.Log2SEW)>; - def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector fvti.RegClass:$rs1), + def : Pat<(fvti.Vector (int_riscv_vfmv_s_f (fvti.Vector VRIgnoreGroup:$rs1), (fvti.Scalar (fpimm0)), VLOpFrag)), (!cast("PseudoVMV_S_X_" # fvti.LMul.MX) (fvti.Vector $rs1), X0, GPR:$vl, fvti.Log2SEW)>; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -1837,11 +1837,11 @@ // 17.1. Integer Scalar Move Instructions // 17.4. Vector Register Gather Instruction foreach vti = AllIntegerVectors in { - def : Pat<(vti.Vector (riscv_vmv_s_x_vl (vti.Vector vti.RegClass:$merge), + def : Pat<(vti.Vector (riscv_vmv_s_x_vl (vti.Vector VRIgnoreGroup:$merge), vti.ScalarRegClass:$rs1, VLOpFrag)), (!cast("PseudoVMV_S_X_"#vti.LMul.MX) - vti.RegClass:$merge, + VRIgnoreGroup:$merge, (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>; def : Pat<(vti.Vector (riscv_vrgather_vv_vl vti.RegClass:$rs2, @@ -1895,16 +1895,16 @@ // 17.2. Floating-Point Scalar Move Instructions foreach vti = AllFloatVectors in { - def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$merge), + def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector VRIgnoreGroup:$merge), (vti.Scalar (fpimm0)), VLOpFrag)), (!cast("PseudoVMV_S_X_"#vti.LMul.MX) - vti.RegClass:$merge, X0, GPR:$vl, vti.Log2SEW)>; - def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector vti.RegClass:$merge), + VRIgnoreGroup:$merge, X0, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_vfmv_s_f_vl (vti.Vector VRIgnoreGroup:$merge), vti.ScalarRegClass:$rs1, VLOpFrag)), (!cast("PseudoVFMV_S_"#vti.ScalarSuffix#"_"#vti.LMul.MX) - vti.RegClass:$merge, + VRIgnoreGroup:$merge, (vti.Scalar vti.ScalarRegClass:$rs1), GPR:$vl, vti.Log2SEW)>; defvar ivti = GetIntVTypeInfo.Vti; diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td @@ -516,6 +516,10 @@ (add (sequence "V%u", 8, 31), (sequence "V%u", 1, 7)), 1>; +def VRIgnoreGroup : VReg; + def VRM2 : VReg; diff --git a/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll b/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll --- a/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll +++ b/llvm/test/CodeGen/RISCV/rvv/combine-splats.ll @@ -60,12 +60,15 @@ ; CHECK-LABEL: combine_vec_shl_shl: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 2 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: li a0, 4 -; CHECK-NEXT: vmv.s.x v12, a0 -; CHECK-NEXT: vsll.vv v8, v8, v10 +; CHECK-NEXT: vmv.s.x v11, a0 +; CHECK-NEXT: vmv1r.v v12, v10 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vsll.vv v8, v8, v12 +; CHECK-NEXT: vmv1r.v v10, v11 +; CHECK-NEXT: vsll.vv v8, v8, v10 ; CHECK-NEXT: ret %ins1 = insertelement poison, i32 2, i32 0 %splat1 = shufflevector %ins1, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll @@ -516,9 +516,9 @@ define float @extractelt_fadd_nxv4f32_splat( %x) { ; CHECK-LABEL: extractelt_fadd_nxv4f32_splat: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI47_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI47_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vfmv.f.s ft1, v8 ; CHECK-NEXT: fadd.s fa0, ft1, ft0 @@ -533,9 +533,9 @@ define float @extractelt_fsub_nxv4f32_splat( %x) { ; CHECK-LABEL: extractelt_fsub_nxv4f32_splat: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI48_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI48_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-NEXT: vfmv.f.s ft1, v8 ; CHECK-NEXT: fsub.s fa0, ft0, ft1 @@ -550,9 +550,9 @@ define float @extractelt_fmul_nxv4f32_splat( %x) { ; CHECK-LABEL: extractelt_fmul_nxv4f32_splat: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; CHECK-NEXT: lui a0, %hi(.LCPI49_0) ; CHECK-NEXT: flw ft0, %lo(.LCPI49_0)(a0) -; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 3 ; CHECK-NEXT: vfmv.f.s ft1, v8 ; CHECK-NEXT: fmul.s fa0, ft1, ft0 diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv32.ll @@ -640,9 +640,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsrl.vx v10, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -654,9 +655,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsrl.vx v10, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -681,9 +683,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsrl.vx v12, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -695,9 +698,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m4, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsrl.vx v12, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx @@ -722,9 +726,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsrl.vx v16, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 2 @@ -736,9 +741,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a0 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsrl.vx v16, v8, a0 ; CHECK-NEXT: vmv.x.s a0, v8 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsrl.vx v8, v8, a1 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: ret %r = extractelement %v, i32 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -159,9 +159,10 @@ ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 3 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v10, v8, a0 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vmv1r.v v8, v10 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; @@ -229,8 +230,8 @@ ; RV32-NEXT: vle32.v v8, (a0) ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v10, v8, 4 -; RV32-NEXT: vmv.x.s a0, v10 ; RV32-NEXT: vslidedown.vi v8, v8, 5 +; RV32-NEXT: vmv.x.s a0, v10 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; @@ -421,9 +422,10 @@ ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: li a0, 32 +; RV32-NEXT: vsrl.vx v10, v8, a0 ; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v8, v8, a1 +; RV32-NEXT: vmv1r.v v8, v10 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; @@ -503,9 +505,9 @@ ; RV32-NEXT: add a1, a1, a1 ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vx v10, v8, a1 -; RV32-NEXT: vmv.x.s a0, v10 ; RV32-NEXT: addi a1, a1, 1 ; RV32-NEXT: vslidedown.vx v8, v8, a1 +; RV32-NEXT: vmv.x.s a0, v10 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll @@ -328,7 +328,8 @@ ; RV32-NEXT: fld ft0, %lo(.LCPI12_0)(a0) ; RV32-NEXT: lui a0, %hi(.LCPI12_1) ; RV32-NEXT: fld ft1, %lo(.LCPI12_1)(a0) -; RV32-NEXT: vfmv.f.s ft2, v8 +; RV32-NEXT: vmv1r.v v12, v8 +; RV32-NEXT: vfmv.f.s ft2, v12 ; RV32-NEXT: fmax.d ft3, ft2, ft0 ; RV32-NEXT: fmin.d ft3, ft3, ft1 ; RV32-NEXT: fcvt.w.d a0, ft3, rtz @@ -347,8 +348,8 @@ ; RV32-NEXT: seqz a2, a2 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: sb a0, 15(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 6 +; RV32-NEXT: sb a0, 15(sp) ; RV32-NEXT: vfmv.f.s ft2, v12 ; RV32-NEXT: fmax.d ft3, ft2, ft0 ; RV32-NEXT: fmin.d ft3, ft3, ft1 @@ -357,8 +358,8 @@ ; RV32-NEXT: seqz a2, a2 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: sb a0, 14(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 5 +; RV32-NEXT: sb a0, 14(sp) ; RV32-NEXT: vfmv.f.s ft2, v12 ; RV32-NEXT: fmax.d ft3, ft2, ft0 ; RV32-NEXT: fmin.d ft3, ft3, ft1 @@ -367,8 +368,8 @@ ; RV32-NEXT: seqz a2, a2 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: sb a0, 13(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 4 +; RV32-NEXT: sb a0, 13(sp) ; RV32-NEXT: vfmv.f.s ft2, v12 ; RV32-NEXT: fmax.d ft3, ft2, ft0 ; RV32-NEXT: fmin.d ft3, ft3, ft1 @@ -377,8 +378,8 @@ ; RV32-NEXT: seqz a2, a2 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: sb a0, 12(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 3 +; RV32-NEXT: sb a0, 12(sp) ; RV32-NEXT: vfmv.f.s ft2, v12 ; RV32-NEXT: fmax.d ft3, ft2, ft0 ; RV32-NEXT: fmin.d ft3, ft3, ft1 @@ -387,8 +388,8 @@ ; RV32-NEXT: seqz a2, a2 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: sb a0, 11(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 2 +; RV32-NEXT: sb a0, 11(sp) ; RV32-NEXT: vfmv.f.s ft2, v12 ; RV32-NEXT: fmax.d ft3, ft2, ft0 ; RV32-NEXT: fmin.d ft3, ft3, ft1 @@ -397,8 +398,8 @@ ; RV32-NEXT: seqz a2, a2 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: sb a0, 10(sp) ; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: sb a0, 10(sp) ; RV32-NEXT: vfmv.f.s ft2, v8 ; RV32-NEXT: fmax.d ft0, ft2, ft0 ; RV32-NEXT: fmin.d ft0, ft0, ft1 @@ -425,7 +426,8 @@ ; RV64-NEXT: fld ft0, %lo(.LCPI12_0)(a0) ; RV64-NEXT: lui a0, %hi(.LCPI12_1) ; RV64-NEXT: fld ft1, %lo(.LCPI12_1)(a0) -; RV64-NEXT: vfmv.f.s ft2, v8 +; RV64-NEXT: vmv1r.v v12, v8 +; RV64-NEXT: vfmv.f.s ft2, v12 ; RV64-NEXT: fmax.d ft3, ft2, ft0 ; RV64-NEXT: fmin.d ft3, ft3, ft1 ; RV64-NEXT: fcvt.l.d a0, ft3, rtz @@ -444,8 +446,8 @@ ; RV64-NEXT: seqz a2, a2 ; RV64-NEXT: addi a2, a2, -1 ; RV64-NEXT: and a0, a2, a0 -; RV64-NEXT: sb a0, 15(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 6 +; RV64-NEXT: sb a0, 15(sp) ; RV64-NEXT: vfmv.f.s ft2, v12 ; RV64-NEXT: fmax.d ft3, ft2, ft0 ; RV64-NEXT: fmin.d ft3, ft3, ft1 @@ -454,8 +456,8 @@ ; RV64-NEXT: seqz a2, a2 ; RV64-NEXT: addi a2, a2, -1 ; RV64-NEXT: and a0, a2, a0 -; RV64-NEXT: sb a0, 14(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 5 +; RV64-NEXT: sb a0, 14(sp) ; RV64-NEXT: vfmv.f.s ft2, v12 ; RV64-NEXT: fmax.d ft3, ft2, ft0 ; RV64-NEXT: fmin.d ft3, ft3, ft1 @@ -464,8 +466,8 @@ ; RV64-NEXT: seqz a2, a2 ; RV64-NEXT: addi a2, a2, -1 ; RV64-NEXT: and a0, a2, a0 -; RV64-NEXT: sb a0, 13(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 4 +; RV64-NEXT: sb a0, 13(sp) ; RV64-NEXT: vfmv.f.s ft2, v12 ; RV64-NEXT: fmax.d ft3, ft2, ft0 ; RV64-NEXT: fmin.d ft3, ft3, ft1 @@ -474,8 +476,8 @@ ; RV64-NEXT: seqz a2, a2 ; RV64-NEXT: addi a2, a2, -1 ; RV64-NEXT: and a0, a2, a0 -; RV64-NEXT: sb a0, 12(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 3 +; RV64-NEXT: sb a0, 12(sp) ; RV64-NEXT: vfmv.f.s ft2, v12 ; RV64-NEXT: fmax.d ft3, ft2, ft0 ; RV64-NEXT: fmin.d ft3, ft3, ft1 @@ -484,8 +486,8 @@ ; RV64-NEXT: seqz a2, a2 ; RV64-NEXT: addi a2, a2, -1 ; RV64-NEXT: and a0, a2, a0 -; RV64-NEXT: sb a0, 11(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 2 +; RV64-NEXT: sb a0, 11(sp) ; RV64-NEXT: vfmv.f.s ft2, v12 ; RV64-NEXT: fmax.d ft3, ft2, ft0 ; RV64-NEXT: fmin.d ft3, ft3, ft1 @@ -494,8 +496,8 @@ ; RV64-NEXT: seqz a2, a2 ; RV64-NEXT: addi a2, a2, -1 ; RV64-NEXT: and a0, a2, a0 -; RV64-NEXT: sb a0, 10(sp) ; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: sb a0, 10(sp) ; RV64-NEXT: vfmv.f.s ft2, v8 ; RV64-NEXT: fmax.d ft0, ft2, ft0 ; RV64-NEXT: fmin.d ft0, ft0, ft1 @@ -526,9 +528,10 @@ ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vmv1r.v v12, v8 ; RV32-NEXT: lui a0, %hi(.LCPI13_0) ; RV32-NEXT: fld ft0, %lo(.LCPI13_0)(a0) -; RV32-NEXT: vfmv.f.s ft1, v8 +; RV32-NEXT: vfmv.f.s ft1, v12 ; RV32-NEXT: fcvt.d.w ft2, zero ; RV32-NEXT: fmax.d ft1, ft1, ft2 ; RV32-NEXT: fmin.d ft1, ft1, ft0 @@ -540,38 +543,38 @@ ; RV32-NEXT: fmax.d ft1, ft1, ft2 ; RV32-NEXT: fmin.d ft1, ft1, ft0 ; RV32-NEXT: fcvt.wu.d a0, ft1, rtz -; RV32-NEXT: sb a0, 15(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 6 +; RV32-NEXT: sb a0, 15(sp) ; RV32-NEXT: vfmv.f.s ft1, v12 ; RV32-NEXT: fmax.d ft1, ft1, ft2 ; RV32-NEXT: fmin.d ft1, ft1, ft0 ; RV32-NEXT: fcvt.wu.d a0, ft1, rtz -; RV32-NEXT: sb a0, 14(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 5 +; RV32-NEXT: sb a0, 14(sp) ; RV32-NEXT: vfmv.f.s ft1, v12 ; RV32-NEXT: fmax.d ft1, ft1, ft2 ; RV32-NEXT: fmin.d ft1, ft1, ft0 ; RV32-NEXT: fcvt.wu.d a0, ft1, rtz -; RV32-NEXT: sb a0, 13(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 4 +; RV32-NEXT: sb a0, 13(sp) ; RV32-NEXT: vfmv.f.s ft1, v12 ; RV32-NEXT: fmax.d ft1, ft1, ft2 ; RV32-NEXT: fmin.d ft1, ft1, ft0 ; RV32-NEXT: fcvt.wu.d a0, ft1, rtz -; RV32-NEXT: sb a0, 12(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 3 +; RV32-NEXT: sb a0, 12(sp) ; RV32-NEXT: vfmv.f.s ft1, v12 ; RV32-NEXT: fmax.d ft1, ft1, ft2 ; RV32-NEXT: fmin.d ft1, ft1, ft0 ; RV32-NEXT: fcvt.wu.d a0, ft1, rtz -; RV32-NEXT: sb a0, 11(sp) ; RV32-NEXT: vslidedown.vi v12, v8, 2 +; RV32-NEXT: sb a0, 11(sp) ; RV32-NEXT: vfmv.f.s ft1, v12 ; RV32-NEXT: fmax.d ft1, ft1, ft2 ; RV32-NEXT: fmin.d ft1, ft1, ft0 ; RV32-NEXT: fcvt.wu.d a0, ft1, rtz -; RV32-NEXT: sb a0, 10(sp) ; RV32-NEXT: vslidedown.vi v8, v8, 1 +; RV32-NEXT: sb a0, 10(sp) ; RV32-NEXT: vfmv.f.s ft1, v8 ; RV32-NEXT: fmax.d ft1, ft1, ft2 ; RV32-NEXT: fmin.d ft0, ft1, ft0 @@ -590,9 +593,10 @@ ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vmv1r.v v12, v8 ; RV64-NEXT: lui a0, %hi(.LCPI13_0) ; RV64-NEXT: fld ft0, %lo(.LCPI13_0)(a0) -; RV64-NEXT: vfmv.f.s ft1, v8 +; RV64-NEXT: vfmv.f.s ft1, v12 ; RV64-NEXT: fmv.d.x ft2, zero ; RV64-NEXT: fmax.d ft1, ft1, ft2 ; RV64-NEXT: fmin.d ft1, ft1, ft0 @@ -604,38 +608,38 @@ ; RV64-NEXT: fmax.d ft1, ft1, ft2 ; RV64-NEXT: fmin.d ft1, ft1, ft0 ; RV64-NEXT: fcvt.lu.d a0, ft1, rtz -; RV64-NEXT: sb a0, 15(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 6 +; RV64-NEXT: sb a0, 15(sp) ; RV64-NEXT: vfmv.f.s ft1, v12 ; RV64-NEXT: fmax.d ft1, ft1, ft2 ; RV64-NEXT: fmin.d ft1, ft1, ft0 ; RV64-NEXT: fcvt.lu.d a0, ft1, rtz -; RV64-NEXT: sb a0, 14(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 5 +; RV64-NEXT: sb a0, 14(sp) ; RV64-NEXT: vfmv.f.s ft1, v12 ; RV64-NEXT: fmax.d ft1, ft1, ft2 ; RV64-NEXT: fmin.d ft1, ft1, ft0 ; RV64-NEXT: fcvt.lu.d a0, ft1, rtz -; RV64-NEXT: sb a0, 13(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 4 +; RV64-NEXT: sb a0, 13(sp) ; RV64-NEXT: vfmv.f.s ft1, v12 ; RV64-NEXT: fmax.d ft1, ft1, ft2 ; RV64-NEXT: fmin.d ft1, ft1, ft0 ; RV64-NEXT: fcvt.lu.d a0, ft1, rtz -; RV64-NEXT: sb a0, 12(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 3 +; RV64-NEXT: sb a0, 12(sp) ; RV64-NEXT: vfmv.f.s ft1, v12 ; RV64-NEXT: fmax.d ft1, ft1, ft2 ; RV64-NEXT: fmin.d ft1, ft1, ft0 ; RV64-NEXT: fcvt.lu.d a0, ft1, rtz -; RV64-NEXT: sb a0, 11(sp) ; RV64-NEXT: vslidedown.vi v12, v8, 2 +; RV64-NEXT: sb a0, 11(sp) ; RV64-NEXT: vfmv.f.s ft1, v12 ; RV64-NEXT: fmax.d ft1, ft1, ft2 ; RV64-NEXT: fmin.d ft1, ft1, ft0 ; RV64-NEXT: fcvt.lu.d a0, ft1, rtz -; RV64-NEXT: sb a0, 10(sp) ; RV64-NEXT: vslidedown.vi v8, v8, 1 +; RV64-NEXT: sb a0, 10(sp) ; RV64-NEXT: vfmv.f.s ft1, v8 ; RV64-NEXT: fmax.d ft1, ft1, ft2 ; RV64-NEXT: fmin.d ft0, ft1, ft0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll @@ -108,8 +108,9 @@ ; CHECK-LABEL: insertelt_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, m4, tu, ma @@ -126,8 +127,9 @@ ; CHECK-LABEL: insertelt_idx_v64i1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, 64 -; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: addi a0, a1, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -22,8 +22,8 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: vmv.s.x v10, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, ma +; RV64-NEXT: vmv.s.x v10, a1 ; RV64-NEXT: vslideup.vi v8, v10, 3 ; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret @@ -94,6 +94,7 @@ ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vsetvli zero, zero, e16, m4, tu, ma ; RV32-NEXT: vmv.s.x v12, a1 ; RV32-NEXT: addi a1, a2, 1 ; RV32-NEXT: vsetvli zero, a1, e16, m4, tu, ma @@ -107,6 +108,7 @@ ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vsetvli zero, zero, e16, m4, tu, ma ; RV64-NEXT: vmv.s.x v12, a1 ; RV64-NEXT: slli a2, a2, 32 ; RV64-NEXT: srli a2, a2, 32 @@ -127,6 +129,7 @@ ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) +; RV32-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV32-NEXT: vfmv.s.f v10, fa0 ; RV32-NEXT: addi a2, a1, 1 ; RV32-NEXT: vsetvli zero, a2, e32, m2, tu, ma @@ -139,6 +142,7 @@ ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64-NEXT: vfmv.s.f v10, fa0 ; RV64-NEXT: slli a1, a1, 32 ; RV64-NEXT: srli a1, a1, 32 @@ -176,6 +180,7 @@ ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: li a2, -1 +; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma ; RV32-NEXT: vmv.s.x v12, a2 ; RV32-NEXT: addi a2, a1, 1 ; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma @@ -189,6 +194,7 @@ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a2, -1 +; RV64-NEXT: vsetvli zero, zero, e64, m4, tu, ma ; RV64-NEXT: vmv.s.x v12, a2 ; RV64-NEXT: slli a1, a1, 32 ; RV64-NEXT: srli a1, a1, 32 @@ -226,6 +232,7 @@ ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vle64.v v8, (a0) ; RV32-NEXT: li a2, 6 +; RV32-NEXT: vsetvli zero, zero, e64, m4, tu, ma ; RV32-NEXT: vmv.s.x v12, a2 ; RV32-NEXT: addi a2, a1, 1 ; RV32-NEXT: vsetvli zero, a2, e64, m4, tu, ma @@ -239,6 +246,7 @@ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: li a2, 6 +; RV64-NEXT: vsetvli zero, zero, e64, m4, tu, ma ; RV64-NEXT: vmv.s.x v12, a2 ; RV64-NEXT: slli a1, a1, 32 ; RV64-NEXT: srli a1, a1, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -203,8 +203,9 @@ ; RV64-LABEL: vrgather_shuffle_vv_v8i64: ; RV64: # %bb.0: ; RV64-NEXT: li a0, 5 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV64-NEXT: vsetivli zero, 8, e64, m4, tu, ma ; RV64-NEXT: vmv.s.x v16, a0 +; RV64-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; RV64-NEXT: vmv.v.i v20, 2 ; RV64-NEXT: vsetvli zero, zero, e64, m4, tu, ma ; RV64-NEXT: vslideup.vi v20, v16, 7 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -4654,8 +4654,9 @@ ; LMULMAX2-RV32-NEXT: vmulhu.vv v10, v8, v10 ; LMULMAX2-RV32-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV32-NEXT: lui a1, 524288 +; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; LMULMAX2-RV32-NEXT: vmv.s.x v12, a1 -; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; LMULMAX2-RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; LMULMAX2-RV32-NEXT: vmv.v.i v14, 0 ; LMULMAX2-RV32-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; LMULMAX2-RV32-NEXT: vslideup.vi v14, v12, 5 @@ -4677,7 +4678,9 @@ ; LMULMAX2-RV64-NEXT: vle64.v v8, (a0) ; LMULMAX2-RV64-NEXT: li a1, -1 ; LMULMAX2-RV64-NEXT: slli a1, a1, 63 +; LMULMAX2-RV64-NEXT: vsetvli zero, zero, e64, m2, tu, ma ; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1 +; LMULMAX2-RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: vmv.v.i v12, 0 ; LMULMAX2-RV64-NEXT: vsetivli zero, 3, e64, m2, tu, ma ; LMULMAX2-RV64-NEXT: vslideup.vi v12, v10, 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -2512,7 +2512,7 @@ ; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 @@ -2521,7 +2521,7 @@ ; RV64ZVE32F-NEXT: .LBB34_11: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2 @@ -2530,7 +2530,7 @@ ; RV64ZVE32F-NEXT: .LBB34_12: # %cond.load7 ; RV64ZVE32F-NEXT: ld a2, 24(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3 @@ -2539,7 +2539,7 @@ ; RV64ZVE32F-NEXT: .LBB34_13: # %cond.load10 ; RV64ZVE32F-NEXT: ld a2, 32(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4 @@ -2548,7 +2548,7 @@ ; RV64ZVE32F-NEXT: .LBB34_14: # %cond.load13 ; RV64ZVE32F-NEXT: ld a2, 40(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5 @@ -2557,7 +2557,7 @@ ; RV64ZVE32F-NEXT: .LBB34_15: # %cond.load16 ; RV64ZVE32F-NEXT: ld a2, 48(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6 @@ -2566,9 +2566,8 @@ ; RV64ZVE32F-NEXT: .LBB34_16: # %cond.load19 ; RV64ZVE32F-NEXT: ld a0, 56(a0) ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7 ; RV64ZVE32F-NEXT: ret %v = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %ptrs, i32 4, <8 x i1> %m, <8 x i32> %passthru) @@ -2606,8 +2605,10 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vmv1r.v v9, v10 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v10, v9 ; RV64ZVE32F-NEXT: .LBB35_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_4 @@ -2618,8 +2619,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB35_4: # %else2 @@ -2632,7 +2634,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -2654,8 +2656,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB35_10: # %else14 @@ -2676,8 +2679,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -2688,8 +2692,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -2700,8 +2705,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -2713,9 +2719,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -2755,8 +2760,10 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vmv1r.v v9, v10 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v10, v9 ; RV64ZVE32F-NEXT: .LBB36_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_4 @@ -2767,8 +2774,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB36_4: # %else2 @@ -2781,7 +2789,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -2803,8 +2811,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB36_10: # %else14 @@ -2825,8 +2834,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -2837,8 +2847,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -2849,8 +2860,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -2862,9 +2874,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -2906,8 +2917,10 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vmv1r.v v9, v10 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v10, v9 ; RV64ZVE32F-NEXT: .LBB37_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_4 @@ -2919,8 +2932,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB37_4: # %else2 @@ -2934,7 +2948,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -2957,8 +2971,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB37_10: # %else14 @@ -2980,8 +2995,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -2993,8 +3009,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -3006,8 +3023,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -3020,9 +3038,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -3064,8 +3081,10 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vmv1r.v v9, v10 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v10, v9 ; RV64ZVE32F-NEXT: .LBB38_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_4 @@ -3076,8 +3095,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB38_4: # %else2 @@ -3090,7 +3110,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -3112,8 +3132,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB38_10: # %else14 @@ -3134,8 +3155,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -3146,8 +3168,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -3158,8 +3181,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -3171,9 +3195,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -3214,8 +3237,10 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vmv1r.v v9, v10 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v10, v9 ; RV64ZVE32F-NEXT: .LBB39_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_4 @@ -3226,8 +3251,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB39_4: # %else2 @@ -3240,7 +3266,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -3262,8 +3288,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB39_10: # %else14 @@ -3284,8 +3311,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -3296,8 +3324,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -3308,8 +3337,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -3321,9 +3351,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -3368,8 +3397,10 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) +; RV64ZVE32F-NEXT: vmv1r.v v9, v10 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vmv.s.x v10, a3 +; RV64ZVE32F-NEXT: vmv.s.x v9, a3 +; RV64ZVE32F-NEXT: vmv1r.v v10, v9 ; RV64ZVE32F-NEXT: .LBB40_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 ; RV64ZVE32F-NEXT: beqz a3, .LBB40_4 @@ -3381,8 +3412,9 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a3 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB40_4: # %else2 @@ -3396,7 +3428,7 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a3 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -3419,8 +3451,9 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a3 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB40_10: # %else14 @@ -3442,8 +3475,9 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a3 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a3, a2, 16 @@ -3455,8 +3489,9 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a3 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 @@ -3468,8 +3503,9 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a3 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a3 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a2, a2, -128 @@ -3482,9 +3518,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -3537,6 +3572,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -3563,6 +3599,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 @@ -3582,7 +3619,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -3594,6 +3633,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 @@ -3605,7 +3645,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 @@ -3617,7 +3657,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -3629,8 +3671,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -4099,8 +4141,9 @@ ; RV32ZVE32F-NEXT: andi a2, t0, 1 ; RV32ZVE32F-NEXT: beqz a2, .LBB47_9 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 +; RV32ZVE32F-NEXT: vmv.x.s a3, v10 ; RV32ZVE32F-NEXT: lw a2, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 @@ -4346,8 +4389,9 @@ ; RV32ZVE32F-NEXT: andi a1, t0, 1 ; RV32ZVE32F-NEXT: beqz a1, .LBB48_9 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 +; RV32ZVE32F-NEXT: vmv.x.s a3, v10 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 @@ -4625,8 +4669,9 @@ ; RV32ZVE32F-NEXT: andi a1, t0, 1 ; RV32ZVE32F-NEXT: beqz a1, .LBB49_9 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 +; RV32ZVE32F-NEXT: vmv.x.s a3, v10 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 @@ -4905,8 +4950,9 @@ ; RV32ZVE32F-NEXT: andi a1, t0, 1 ; RV32ZVE32F-NEXT: beqz a1, .LBB50_9 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 +; RV32ZVE32F-NEXT: vmv.x.s a3, v10 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 @@ -5193,8 +5239,9 @@ ; RV32ZVE32F-NEXT: andi a1, t0, 1 ; RV32ZVE32F-NEXT: beqz a1, .LBB51_9 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 +; RV32ZVE32F-NEXT: vmv.x.s a3, v10 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 @@ -5473,8 +5520,9 @@ ; RV32ZVE32F-NEXT: andi a1, t0, 1 ; RV32ZVE32F-NEXT: beqz a1, .LBB52_9 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 +; RV32ZVE32F-NEXT: vmv.x.s a3, v10 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 @@ -5754,8 +5802,9 @@ ; RV32ZVE32F-NEXT: andi a1, t0, 1 ; RV32ZVE32F-NEXT: beqz a1, .LBB53_9 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 +; RV32ZVE32F-NEXT: vmv.x.s a3, v10 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 @@ -6043,8 +6092,9 @@ ; RV32ZVE32F-NEXT: andi a1, t0, 1 ; RV32ZVE32F-NEXT: beqz a1, .LBB54_9 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 +; RV32ZVE32F-NEXT: vmv.x.s a3, v10 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 @@ -6321,8 +6371,9 @@ ; RV32ZVE32F-NEXT: andi a1, t0, 1 ; RV32ZVE32F-NEXT: beqz a1, .LBB55_9 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 +; RV32ZVE32F-NEXT: vmv.x.s a3, v10 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 @@ -6600,8 +6651,9 @@ ; RV32ZVE32F-NEXT: andi a1, t0, 1 ; RV32ZVE32F-NEXT: beqz a1, .LBB56_9 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a3, v8 +; RV32ZVE32F-NEXT: vmv.x.s a3, v10 ; RV32ZVE32F-NEXT: lw a1, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: andi a4, t0, 2 @@ -6915,8 +6967,8 @@ ; RV32ZVE32F-NEXT: lw t6, 20(a3) ; RV32ZVE32F-NEXT: lw s2, 16(a3) ; RV32ZVE32F-NEXT: lw s3, 12(a3) -; RV32ZVE32F-NEXT: lw s5, 8(a3) -; RV32ZVE32F-NEXT: lw s4, 4(a3) +; RV32ZVE32F-NEXT: lw s4, 8(a3) +; RV32ZVE32F-NEXT: lw s5, 4(a3) ; RV32ZVE32F-NEXT: lw a3, 0(a3) ; RV32ZVE32F-NEXT: lw s6, 0(a2) ; RV32ZVE32F-NEXT: lw s7, 8(a2) @@ -6972,8 +7024,8 @@ ; RV32ZVE32F-NEXT: lw a5, 0(a1) ; RV32ZVE32F-NEXT: .LBB57_9: # %else20 ; RV32ZVE32F-NEXT: sw a3, 0(a0) -; RV32ZVE32F-NEXT: sw s4, 4(a0) -; RV32ZVE32F-NEXT: sw s5, 8(a0) +; RV32ZVE32F-NEXT: sw s5, 4(a0) +; RV32ZVE32F-NEXT: sw s4, 8(a0) ; RV32ZVE32F-NEXT: sw s3, 12(a0) ; RV32ZVE32F-NEXT: sw s2, 16(a0) ; RV32ZVE32F-NEXT: sw t6, 20(a0) @@ -7003,9 +7055,10 @@ ; RV32ZVE32F-NEXT: addi sp, sp, 96 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB57_10: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 -; RV32ZVE32F-NEXT: lw s4, 4(a2) +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 +; RV32ZVE32F-NEXT: lw s5, 4(a2) ; RV32ZVE32F-NEXT: lw a3, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: beqz a2, .LBB57_2 @@ -7014,7 +7067,7 @@ ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: lw s3, 4(a2) -; RV32ZVE32F-NEXT: lw s5, 0(a2) +; RV32ZVE32F-NEXT: lw s4, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 4 ; RV32ZVE32F-NEXT: beqz a2, .LBB57_3 ; RV32ZVE32F-NEXT: .LBB57_12: # %cond.load4 @@ -8450,7 +8503,7 @@ ; RV64ZVE32F-NEXT: .LBB73_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 @@ -8459,7 +8512,7 @@ ; RV64ZVE32F-NEXT: .LBB73_11: # %cond.load4 ; RV64ZVE32F-NEXT: ld a2, 16(a0) ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 2 @@ -8468,7 +8521,7 @@ ; RV64ZVE32F-NEXT: .LBB73_12: # %cond.load7 ; RV64ZVE32F-NEXT: ld a2, 24(a0) ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 3 @@ -8477,7 +8530,7 @@ ; RV64ZVE32F-NEXT: .LBB73_13: # %cond.load10 ; RV64ZVE32F-NEXT: ld a2, 32(a0) ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 4 @@ -8486,7 +8539,7 @@ ; RV64ZVE32F-NEXT: .LBB73_14: # %cond.load13 ; RV64ZVE32F-NEXT: ld a2, 40(a0) ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 5 @@ -8495,7 +8548,7 @@ ; RV64ZVE32F-NEXT: .LBB73_15: # %cond.load16 ; RV64ZVE32F-NEXT: ld a2, 48(a0) ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 6 @@ -8504,9 +8557,8 @@ ; RV64ZVE32F-NEXT: .LBB73_16: # %cond.load19 ; RV64ZVE32F-NEXT: ld a0, 56(a0) ; RV64ZVE32F-NEXT: flw ft0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 7 ; RV64ZVE32F-NEXT: ret %v = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %ptrs, i32 4, <8 x i1> %m, <8 x float> %passthru) @@ -8544,8 +8596,10 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) +; RV64ZVE32F-NEXT: vmv1r.v v9, v10 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v10, v9 ; RV64ZVE32F-NEXT: .LBB74_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB74_4 @@ -8556,8 +8610,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB74_4: # %else2 @@ -8570,7 +8625,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -8592,8 +8647,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB74_10: # %else14 @@ -8614,8 +8670,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -8626,8 +8683,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -8638,8 +8696,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -8651,9 +8710,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw ft0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -8693,8 +8751,10 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) +; RV64ZVE32F-NEXT: vmv1r.v v9, v10 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v10, v9 ; RV64ZVE32F-NEXT: .LBB75_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB75_4 @@ -8705,8 +8765,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB75_4: # %else2 @@ -8719,7 +8780,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -8741,8 +8802,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB75_10: # %else14 @@ -8763,8 +8825,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -8775,8 +8838,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -8787,8 +8851,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -8800,9 +8865,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw ft0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -8844,8 +8908,10 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) +; RV64ZVE32F-NEXT: vmv1r.v v9, v10 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v10, v9 ; RV64ZVE32F-NEXT: .LBB76_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB76_4 @@ -8857,8 +8923,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB76_4: # %else2 @@ -8872,7 +8939,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -8895,8 +8962,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB76_10: # %else14 @@ -8918,8 +8986,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -8931,8 +9000,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -8944,8 +9014,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -8958,9 +9029,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw ft0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -9002,8 +9072,10 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) +; RV64ZVE32F-NEXT: vmv1r.v v9, v10 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v10, v9 ; RV64ZVE32F-NEXT: .LBB77_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB77_4 @@ -9014,8 +9086,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB77_4: # %else2 @@ -9028,7 +9101,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -9050,8 +9123,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB77_10: # %else14 @@ -9072,8 +9146,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -9084,8 +9159,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -9096,8 +9172,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -9109,9 +9186,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw ft0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -9152,8 +9228,10 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) +; RV64ZVE32F-NEXT: vmv1r.v v9, v10 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v10, v9 ; RV64ZVE32F-NEXT: .LBB78_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB78_4 @@ -9164,8 +9242,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB78_4: # %else2 @@ -9178,7 +9257,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -9200,8 +9279,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB78_10: # %else14 @@ -9222,8 +9302,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -9234,8 +9315,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -9246,8 +9328,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -9259,9 +9342,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw ft0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -9306,8 +9388,10 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw ft0, 0(a3) +; RV64ZVE32F-NEXT: vmv1r.v v9, v10 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma -; RV64ZVE32F-NEXT: vfmv.s.f v10, ft0 +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v10, v9 ; RV64ZVE32F-NEXT: .LBB79_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 ; RV64ZVE32F-NEXT: beqz a3, .LBB79_4 @@ -9319,8 +9403,9 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw ft0, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 ; RV64ZVE32F-NEXT: .LBB79_4: # %else2 @@ -9334,7 +9419,7 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw ft0, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 2 @@ -9357,8 +9442,9 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw ft0, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB79_10: # %else14 @@ -9380,8 +9466,9 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw ft0, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 3 ; RV64ZVE32F-NEXT: andi a3, a2, 16 @@ -9393,8 +9480,9 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw ft0, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 @@ -9406,8 +9494,9 @@ ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw ft0, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a2, a2, -128 @@ -9420,9 +9509,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw ft0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -9475,6 +9563,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -9501,6 +9590,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 @@ -9520,7 +9610,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vfmv.s.f v14, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v14, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -9532,6 +9624,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 3 @@ -9543,7 +9636,7 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 @@ -9555,7 +9648,9 @@ ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw ft0, 0(a2) -; RV64ZVE32F-NEXT: vfmv.s.f v12, ft0 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v9, ft0 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 6 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -9567,8 +9662,8 @@ ; RV64ZVE32F-NEXT: slli a1, a1, 2 ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: flw ft0, 0(a0) -; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vfmv.s.f v8, ft0 ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 7 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 ; RV64ZVE32F-NEXT: ret @@ -9995,8 +10090,9 @@ ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB86_10: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa0, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: beqz a2, .LBB86_2 @@ -10191,8 +10287,9 @@ ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB87_10: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa0, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: beqz a2, .LBB87_2 @@ -10409,8 +10506,9 @@ ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB88_10: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa0, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: beqz a2, .LBB88_2 @@ -10628,8 +10726,9 @@ ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB89_10: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa0, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: beqz a2, .LBB89_2 @@ -10855,8 +10954,9 @@ ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB90_10: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa0, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: beqz a2, .LBB90_2 @@ -11074,8 +11174,9 @@ ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB91_10: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa0, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: beqz a2, .LBB91_2 @@ -11294,8 +11395,9 @@ ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB92_10: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa0, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: beqz a2, .LBB92_2 @@ -11522,8 +11624,9 @@ ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB93_10: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa0, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: beqz a2, .LBB93_2 @@ -11741,8 +11844,9 @@ ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB94_10: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa0, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: beqz a2, .LBB94_2 @@ -11961,8 +12065,9 @@ ; RV32ZVE32F-NEXT: fsd fa7, 56(a0) ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB95_10: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa0, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: beqz a2, .LBB95_2 @@ -12220,8 +12325,9 @@ ; RV32ZVE32F-NEXT: addi sp, sp, 64 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB96_10: # %cond.load +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a2, v8 +; RV32ZVE32F-NEXT: vmv.x.s a2, v10 ; RV32ZVE32F-NEXT: fld fa0, 0(a2) ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: beqz a2, .LBB96_2 @@ -12671,7 +12777,7 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 1 @@ -12685,8 +12791,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v13 ; RV64ZVE32F-NEXT: vsetivli zero, 3, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 2 ; RV64ZVE32F-NEXT: .LBB98_6: # %else5 @@ -12707,8 +12814,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v12 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 5 ; RV64ZVE32F-NEXT: .LBB98_10: # %else14 @@ -12734,8 +12842,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v13 ; RV64ZVE32F-NEXT: vsetivli zero, 10, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 9 ; RV64ZVE32F-NEXT: .LBB98_15: # %else26 @@ -12748,7 +12857,7 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 11, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 10 @@ -12764,8 +12873,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v13 ; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 11 ; RV64ZVE32F-NEXT: .LBB98_19: # %else32 @@ -12778,8 +12888,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v13 ; RV64ZVE32F-NEXT: vsetivli zero, 13, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 12 ; RV64ZVE32F-NEXT: .LBB98_21: # %else35 @@ -12792,8 +12903,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v13 ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 13 ; RV64ZVE32F-NEXT: .LBB98_23: # %else38 @@ -12817,7 +12929,7 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 18, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 17 @@ -12831,8 +12943,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v12 ; RV64ZVE32F-NEXT: vsetivli zero, 19, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 18 ; RV64ZVE32F-NEXT: .LBB98_30: # %else53 @@ -12853,8 +12966,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v13 ; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 21 ; RV64ZVE32F-NEXT: .LBB98_34: # %else62 @@ -12880,8 +12994,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 26, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 25 ; RV64ZVE32F-NEXT: .LBB98_39: # %else74 @@ -12894,7 +13009,7 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 27, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 26 @@ -12916,8 +13031,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29 ; RV64ZVE32F-NEXT: .LBB98_45: # %else86 @@ -12930,8 +13046,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 31, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 30 ; RV64ZVE32F-NEXT: .LBB98_47: # %else89 @@ -12945,9 +13062,8 @@ ; RV64ZVE32F-NEXT: add a0, a0, a1 ; RV64ZVE32F-NEXT: lb a0, 0(a0) ; RV64ZVE32F-NEXT: li a1, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a1, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 31 ; RV64ZVE32F-NEXT: .LBB98_49: # %else92 ; RV64ZVE32F-NEXT: vmv2r.v v8, v10 @@ -12959,8 +13075,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v12 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 3 ; RV64ZVE32F-NEXT: andi a2, a1, 16 @@ -12971,8 +13088,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v12 ; RV64ZVE32F-NEXT: vsetivli zero, 5, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 @@ -12983,7 +13101,7 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v14, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 7, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 6 @@ -12996,8 +13114,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v13 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 7 ; RV64ZVE32F-NEXT: andi a2, a1, 256 @@ -13008,8 +13127,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v13 ; RV64ZVE32F-NEXT: vsetivli zero, 9, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 8 ; RV64ZVE32F-NEXT: andi a2, a1, 512 @@ -13020,8 +13140,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v13 ; RV64ZVE32F-NEXT: vsetivli zero, 15, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 14 ; RV64ZVE32F-NEXT: slli a2, a1, 48 @@ -13033,7 +13154,7 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 15 @@ -13045,7 +13166,7 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 17, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 16 @@ -13059,8 +13180,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v13 ; RV64ZVE32F-NEXT: vsetivli zero, 20, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 19 ; RV64ZVE32F-NEXT: slli a2, a1, 43 @@ -13071,8 +13193,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v14, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v13, a2 +; RV64ZVE32F-NEXT: vmv1r.v v14, v13 ; RV64ZVE32F-NEXT: vsetivli zero, 21, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 20 ; RV64ZVE32F-NEXT: slli a2, a1, 42 @@ -13083,7 +13206,7 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vsetivli zero, 23, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 22 @@ -13096,8 +13219,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 24, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 23 ; RV64ZVE32F-NEXT: slli a2, a1, 39 @@ -13108,8 +13232,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 25, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 24 ; RV64ZVE32F-NEXT: slli a2, a1, 38 @@ -13122,8 +13247,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 28, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 27 ; RV64ZVE32F-NEXT: slli a2, a1, 35 @@ -13134,8 +13260,9 @@ ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lb a2, 0(a2) ; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, ta, ma -; RV64ZVE32F-NEXT: vmv.s.x v12, a2 +; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, m2, tu, ma +; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vmv1r.v v12, v9 ; RV64ZVE32F-NEXT: vsetivli zero, 29, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 28 ; RV64ZVE32F-NEXT: slli a2, a1, 34 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -3355,8 +3355,9 @@ ; RV32ZVE32F-NEXT: .LBB41_10: # %cond.store ; RV32ZVE32F-NEXT: lw s1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s s2, v8 +; RV32ZVE32F-NEXT: vmv.x.s s2, v10 ; RV32ZVE32F-NEXT: sw s1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) ; RV32ZVE32F-NEXT: andi a0, a6, 2 @@ -3589,8 +3590,9 @@ ; RV32ZVE32F-NEXT: .LBB42_10: # %cond.store ; RV32ZVE32F-NEXT: lw s1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s s2, v8 +; RV32ZVE32F-NEXT: vmv.x.s s2, v10 ; RV32ZVE32F-NEXT: sw s1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) ; RV32ZVE32F-NEXT: andi a0, a1, 2 @@ -3836,8 +3838,9 @@ ; RV32ZVE32F-NEXT: .LBB43_10: # %cond.store ; RV32ZVE32F-NEXT: lw s1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s s2, v8 +; RV32ZVE32F-NEXT: vmv.x.s s2, v10 ; RV32ZVE32F-NEXT: sw s1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) ; RV32ZVE32F-NEXT: andi a0, a1, 2 @@ -4084,8 +4087,9 @@ ; RV32ZVE32F-NEXT: .LBB44_10: # %cond.store ; RV32ZVE32F-NEXT: lw s1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s s2, v8 +; RV32ZVE32F-NEXT: vmv.x.s s2, v10 ; RV32ZVE32F-NEXT: sw s1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) ; RV32ZVE32F-NEXT: andi a0, a1, 2 @@ -4340,8 +4344,9 @@ ; RV32ZVE32F-NEXT: .LBB45_10: # %cond.store ; RV32ZVE32F-NEXT: lw s1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s s2, v8 +; RV32ZVE32F-NEXT: vmv.x.s s2, v10 ; RV32ZVE32F-NEXT: sw s1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) ; RV32ZVE32F-NEXT: andi a0, a1, 2 @@ -4588,8 +4593,9 @@ ; RV32ZVE32F-NEXT: .LBB46_10: # %cond.store ; RV32ZVE32F-NEXT: lw s1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s s2, v8 +; RV32ZVE32F-NEXT: vmv.x.s s2, v10 ; RV32ZVE32F-NEXT: sw s1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) ; RV32ZVE32F-NEXT: andi a0, a1, 2 @@ -4837,8 +4843,9 @@ ; RV32ZVE32F-NEXT: .LBB47_10: # %cond.store ; RV32ZVE32F-NEXT: lw s1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s s2, v8 +; RV32ZVE32F-NEXT: vmv.x.s s2, v10 ; RV32ZVE32F-NEXT: sw s1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) ; RV32ZVE32F-NEXT: andi a0, a1, 2 @@ -5094,8 +5101,9 @@ ; RV32ZVE32F-NEXT: .LBB48_10: # %cond.store ; RV32ZVE32F-NEXT: lw s1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s s2, v8 +; RV32ZVE32F-NEXT: vmv.x.s s2, v10 ; RV32ZVE32F-NEXT: sw s1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) ; RV32ZVE32F-NEXT: andi a0, a1, 2 @@ -5342,8 +5350,9 @@ ; RV32ZVE32F-NEXT: .LBB49_10: # %cond.store ; RV32ZVE32F-NEXT: lw s1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s s2, v8 +; RV32ZVE32F-NEXT: vmv.x.s s2, v10 ; RV32ZVE32F-NEXT: sw s1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) ; RV32ZVE32F-NEXT: andi a0, a1, 2 @@ -5591,8 +5600,9 @@ ; RV32ZVE32F-NEXT: .LBB50_10: # %cond.store ; RV32ZVE32F-NEXT: lw s1, 4(a0) ; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s s2, v8 +; RV32ZVE32F-NEXT: vmv.x.s s2, v10 ; RV32ZVE32F-NEXT: sw s1, 4(s2) ; RV32ZVE32F-NEXT: sw a0, 0(s2) ; RV32ZVE32F-NEXT: andi a0, a1, 2 @@ -5897,8 +5907,9 @@ ; RV32ZVE32F-NEXT: addi sp, sp, 96 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB51_10: # %cond.store +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: sw s5, 4(a1) ; RV32ZVE32F-NEXT: sw s4, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 @@ -8531,8 +8542,9 @@ ; RV32ZVE32F-NEXT: .LBB80_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB80_9: # %cond.store +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: beqz a1, .LBB80_2 @@ -8709,8 +8721,9 @@ ; RV32ZVE32F-NEXT: .LBB81_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB81_9: # %cond.store +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: beqz a1, .LBB81_2 @@ -8912,8 +8925,9 @@ ; RV32ZVE32F-NEXT: .LBB82_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB82_9: # %cond.store +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: beqz a1, .LBB82_2 @@ -9116,8 +9130,9 @@ ; RV32ZVE32F-NEXT: .LBB83_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB83_9: # %cond.store +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: beqz a1, .LBB83_2 @@ -9328,8 +9343,9 @@ ; RV32ZVE32F-NEXT: .LBB84_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB84_9: # %cond.store +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: beqz a1, .LBB84_2 @@ -9532,8 +9548,9 @@ ; RV32ZVE32F-NEXT: .LBB85_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB85_9: # %cond.store +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: beqz a1, .LBB85_2 @@ -9737,8 +9754,9 @@ ; RV32ZVE32F-NEXT: .LBB86_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB86_9: # %cond.store +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: beqz a1, .LBB86_2 @@ -9950,8 +9968,9 @@ ; RV32ZVE32F-NEXT: .LBB87_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB87_9: # %cond.store +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: beqz a1, .LBB87_2 @@ -10154,8 +10173,9 @@ ; RV32ZVE32F-NEXT: .LBB88_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB88_9: # %cond.store +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: beqz a1, .LBB88_2 @@ -10359,8 +10379,9 @@ ; RV32ZVE32F-NEXT: .LBB89_8: # %else14 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB89_9: # %cond.store +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: beqz a1, .LBB89_2 @@ -10608,8 +10629,9 @@ ; RV32ZVE32F-NEXT: addi sp, sp, 64 ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB90_10: # %cond.store +; RV32ZVE32F-NEXT: vmv1r.v v10, v8 ; RV32ZVE32F-NEXT: vsetivli zero, 0, e32, m2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s a1, v8 +; RV32ZVE32F-NEXT: vmv.x.s a1, v10 ; RV32ZVE32F-NEXT: fsd fa0, 0(a1) ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: beqz a1, .LBB90_2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -7365,7 +7365,8 @@ ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: vrgather.vi v10, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vmv1r.v v10, v8 +; RV32-NEXT: vmv.x.s a0, v10 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 @@ -7400,7 +7401,8 @@ ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: vrgather.vi v12, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vmv1r.v v12, v8 +; RV32-NEXT: vmv.x.s a0, v12 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m4, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 @@ -7439,7 +7441,8 @@ ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vrgather.vi v16, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v16 -; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vmv1r.v v16, v8 +; RV32-NEXT: vmv.x.s a0, v16 ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetivli zero, 1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v8, v8, a1 @@ -7483,8 +7486,9 @@ ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vrgather.vi v16, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vmv1r.v v16, v8 ; RV32-NEXT: vsetivli zero, 0, e32, m8, ta, ma -; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vmv.x.s a0, v16 ; RV32-NEXT: vsetivli zero, 1, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vmv.x.s a1, v8 @@ -7536,8 +7540,9 @@ ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vrgather.vi v16, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vmv1r.v v16, v8 ; RV32-NEXT: vsetivli zero, 0, e32, m8, ta, ma -; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vmv.x.s a0, v16 ; RV32-NEXT: vsetivli zero, 1, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 1 ; RV32-NEXT: vmv.x.s a1, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -289,8 +289,9 @@ ; RV64-NEXT: .LBB6_5: # %cond.store ; RV64-NEXT: vsetivli zero, 0, e16, mf2, ta, ma ; RV64-NEXT: vmv.x.s a1, v8 +; RV64-NEXT: vmv1r.v v9, v10 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; RV64-NEXT: vmv.x.s a2, v10 +; RV64-NEXT: vmv.x.s a2, v9 ; RV64-NEXT: sb a1, 0(a2) ; RV64-NEXT: srli a1, a1, 8 ; RV64-NEXT: sb a1, 1(a2) @@ -302,7 +303,8 @@ ; RV64-NEXT: vmv.x.s a1, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV64-NEXT: vslidedown.vi v12, v10, 1 -; RV64-NEXT: vmv.x.s a2, v12 +; RV64-NEXT: vmv1r.v v9, v12 +; RV64-NEXT: vmv.x.s a2, v9 ; RV64-NEXT: sb a1, 0(a2) ; RV64-NEXT: srli a1, a1, 8 ; RV64-NEXT: sb a1, 1(a2) @@ -314,7 +316,8 @@ ; RV64-NEXT: vmv.x.s a1, v9 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV64-NEXT: vslidedown.vi v12, v10, 2 -; RV64-NEXT: vmv.x.s a2, v12 +; RV64-NEXT: vmv1r.v v9, v12 +; RV64-NEXT: vmv.x.s a2, v9 ; RV64-NEXT: sb a1, 0(a2) ; RV64-NEXT: srli a1, a1, 8 ; RV64-NEXT: sb a1, 1(a2) diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll @@ -112,8 +112,10 @@ define @insertelt_nxv8f16_0( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv8f16_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r @@ -122,7 +124,7 @@ define @insertelt_nxv8f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv8f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m2, tu, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 @@ -134,7 +136,7 @@ define @insertelt_nxv8f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m2, tu, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma @@ -147,8 +149,10 @@ define @insertelt_nxv16f16_0( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv16f16_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r @@ -157,7 +161,7 @@ define @insertelt_nxv16f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv16f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m4, tu, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 3 @@ -169,7 +173,7 @@ define @insertelt_nxv16f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m4, tu, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma @@ -182,8 +186,10 @@ define @insertelt_nxv32f16_0( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv32f16_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m8, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %r = insertelement %v, half %elt, i32 0 ret %r @@ -192,7 +198,7 @@ define @insertelt_nxv32f16_imm( %v, half %elt) { ; CHECK-LABEL: insertelt_nxv32f16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e16, m8, tu, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vsetivli zero, 4, e16, m8, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 3 @@ -204,7 +210,7 @@ define @insertelt_nxv32f16_idx( %v, half %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv32f16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m8, tu, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma @@ -287,8 +293,10 @@ define @insertelt_nxv4f32_0( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv4f32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli a0, zero, e32, m2, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 0 ret %r @@ -297,7 +305,7 @@ define @insertelt_nxv4f32_imm( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv4f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m2, tu, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 @@ -309,7 +317,7 @@ define @insertelt_nxv4f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m2, tu, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma @@ -322,8 +330,10 @@ define @insertelt_nxv8f32_0( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv8f32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 0 ret %r @@ -332,7 +342,7 @@ define @insertelt_nxv8f32_imm( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv8f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m4, tu, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 3 @@ -344,7 +354,7 @@ define @insertelt_nxv8f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m4, tu, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma @@ -357,8 +367,10 @@ define @insertelt_nxv16f32_0( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv16f32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli a0, zero, e32, m8, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %r = insertelement %v, float %elt, i32 0 ret %r @@ -367,7 +379,7 @@ define @insertelt_nxv16f32_imm( %v, float %elt) { ; CHECK-LABEL: insertelt_nxv16f32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e32, m8, tu, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vsetivli zero, 4, e32, m8, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 3 @@ -379,7 +391,7 @@ define @insertelt_nxv16f32_idx( %v, float %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16f32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m8, tu, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma @@ -427,8 +439,10 @@ define @insertelt_nxv2f64_0( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv2f64_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli a0, zero, e64, m2, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 0 ret %r @@ -437,7 +451,7 @@ define @insertelt_nxv2f64_imm( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv2f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m2, tu, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 @@ -449,7 +463,7 @@ define @insertelt_nxv2f64_idx( %v, double %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv2f64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m2, tu, ma ; CHECK-NEXT: vfmv.s.f v10, fa0 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma @@ -462,8 +476,10 @@ define @insertelt_nxv4f64_0( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv4f64_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a0, zero, e64, m4, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 0 ret %r @@ -472,7 +488,7 @@ define @insertelt_nxv4f64_imm( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv4f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m4, tu, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: vsetivli zero, 4, e64, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 3 @@ -484,7 +500,7 @@ define @insertelt_nxv4f64_idx( %v, double %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4f64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m4, tu, ma ; CHECK-NEXT: vfmv.s.f v12, fa0 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, ma @@ -497,8 +513,10 @@ define @insertelt_nxv8f64_0( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv8f64_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli a0, zero, e64, m8, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %r = insertelement %v, double %elt, i32 0 ret %r @@ -507,7 +525,7 @@ define @insertelt_nxv8f64_imm( %v, double %elt) { ; CHECK-LABEL: insertelt_nxv8f64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a0, zero, e64, m8, tu, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 3 @@ -519,7 +537,7 @@ define @insertelt_nxv8f64_idx( %v, double %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8f64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m8, tu, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll @@ -145,8 +145,9 @@ define @insertelt_nxv16i1( %x, i1 %elt) { ; CHECK-LABEL: insertelt_nxv16i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m2, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: vsetivli zero, 3, e8, m2, tu, ma @@ -162,8 +163,9 @@ define @insertelt_idx_nxv16i1( %x, i1 %elt, i64 %idx) { ; CHECK-LABEL: insertelt_idx_nxv16i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e8, m2, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmerge.vim v10, v10, 1, v0 ; CHECK-NEXT: addi a0, a1, 1 @@ -180,8 +182,9 @@ define @insertelt_nxv32i1( %x, i1 %elt) { ; CHECK-LABEL: insertelt_nxv32i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m4, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: vsetivli zero, 3, e8, m4, tu, ma @@ -197,8 +200,9 @@ define @insertelt_idx_nxv32i1( %x, i1 %elt, i64 %idx) { ; CHECK-LABEL: insertelt_idx_nxv32i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e8, m4, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vmerge.vim v12, v12, 1, v0 ; CHECK-NEXT: addi a0, a1, 1 @@ -215,8 +219,9 @@ define @insertelt_nxv64i1( %x, i1 %elt) { ; CHECK-LABEL: insertelt_nxv64i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m8, tu, ma ; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 ; CHECK-NEXT: vsetivli zero, 3, e8, m8, tu, ma @@ -232,15 +237,16 @@ define @insertelt_idx_nxv64i1( %x, i1 %elt, i64 %idx) { ; CHECK-LABEL: insertelt_idx_nxv64i1: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a0 -; CHECK-NEXT: vmv.v.i v16, 0 -; CHECK-NEXT: vmerge.vim v16, v16, 1, v0 +; CHECK-NEXT: vsetvli a2, zero, e8, m8, tu, ma +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vsetvli zero, zero, e8, m8, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v16, v8, a1 +; CHECK-NEXT: vslideup.vx v8, v16, a1 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; CHECK-NEXT: vand.vi v8, v16, 1 +; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: ret %y = insertelement %x, i1 %elt, i64 %idx diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll @@ -145,8 +145,10 @@ define @insertelt_nxv16i8_0( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i8_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli a1, zero, e8, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 0 ret %r @@ -155,7 +157,7 @@ define @insertelt_nxv16i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 @@ -167,7 +169,7 @@ define @insertelt_nxv16i8_idx( %v, i8 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv16i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e8, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, ma @@ -180,8 +182,10 @@ define @insertelt_nxv32i8_0( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i8_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 0 ret %r @@ -190,7 +194,7 @@ define @insertelt_nxv32i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m4, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 3 @@ -202,7 +206,7 @@ define @insertelt_nxv32i8_idx( %v, i8 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv32i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e8, m4, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma @@ -215,8 +219,10 @@ define @insertelt_nxv64i8_0( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv64i8_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli a1, zero, e8, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 0 ret %r @@ -225,7 +231,7 @@ define @insertelt_nxv64i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv64i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m8, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, m8, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 3 @@ -237,7 +243,7 @@ define @insertelt_nxv64i8_idx( %v, i8 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv64i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e8, m8, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma @@ -355,8 +361,10 @@ define @insertelt_nxv8i16_0( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i16_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 0 ret %r @@ -365,7 +373,7 @@ define @insertelt_nxv8i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 @@ -377,7 +385,7 @@ define @insertelt_nxv8i16_idx( %v, i16 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv8i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma @@ -390,8 +398,10 @@ define @insertelt_nxv16i16_0( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i16_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a1, zero, e16, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 0 ret %r @@ -400,7 +410,7 @@ define @insertelt_nxv16i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m4, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 3 @@ -412,7 +422,7 @@ define @insertelt_nxv16i16_idx( %v, i16 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv16i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m4, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma @@ -425,8 +435,10 @@ define @insertelt_nxv32i16_0( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i16_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli a1, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 0 ret %r @@ -435,7 +447,7 @@ define @insertelt_nxv32i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m8, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, m8, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 3 @@ -447,7 +459,7 @@ define @insertelt_nxv32i16_idx( %v, i16 signext %elt, i32 signext %idx) { ; CHECK-LABEL: insertelt_nxv32i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m8, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma @@ -530,8 +542,10 @@ define @insertelt_nxv4i32_0( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv4i32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli a1, zero, e32, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 0 ret %r @@ -540,7 +554,7 @@ define @insertelt_nxv4i32_imm( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv4i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 @@ -552,7 +566,7 @@ define @insertelt_nxv4i32_idx( %v, i32 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv4i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma @@ -565,8 +579,10 @@ define @insertelt_nxv8i32_0( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv8i32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 0 ret %r @@ -575,7 +591,7 @@ define @insertelt_nxv8i32_imm( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv8i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m4, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 3 @@ -587,7 +603,7 @@ define @insertelt_nxv8i32_idx( %v, i32 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv8i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m4, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma @@ -600,8 +616,10 @@ define @insertelt_nxv16i32_0( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv16i32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli a1, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 0 ret %r @@ -610,7 +628,7 @@ define @insertelt_nxv16i32_imm( %v, i32 %elt) { ; CHECK-LABEL: insertelt_nxv16i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m8, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m8, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 3 @@ -622,7 +640,7 @@ define @insertelt_nxv16i32_idx( %v, i32 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv16i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m8, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma @@ -789,8 +807,10 @@ ; CHECK-LABEL: insertelt_nxv2i64_0_c10: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 10 +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli a1, zero, e64, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %r = insertelement %v, i64 10, i32 0 ret %r @@ -800,7 +820,7 @@ ; CHECK-LABEL: insertelt_nxv2i64_imm_c10: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 10 -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 @@ -813,7 +833,7 @@ ; CHECK-LABEL: insertelt_nxv2i64_idx_c10: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: vsetvli a2, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e64, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a1 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma @@ -827,8 +847,10 @@ ; CHECK-LABEL: insertelt_nxv2i64_0_cn1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli a1, zero, e64, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %r = insertelement %v, i64 -1, i32 0 ret %r @@ -838,7 +860,7 @@ ; CHECK-LABEL: insertelt_nxv2i64_imm_cn1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 @@ -851,7 +873,7 @@ ; CHECK-LABEL: insertelt_nxv2i64_idx_cn1: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, -1 -; CHECK-NEXT: vsetvli a2, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e64, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a1 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll @@ -145,8 +145,10 @@ define @insertelt_nxv16i8_0( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i8_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli a1, zero, e8, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 0 ret %r @@ -155,7 +157,7 @@ define @insertelt_nxv16i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 @@ -167,7 +169,7 @@ define @insertelt_nxv16i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e8, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m2, tu, ma @@ -180,8 +182,10 @@ define @insertelt_nxv32i8_0( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i8_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 0 ret %r @@ -190,7 +194,7 @@ define @insertelt_nxv32i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m4, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 3 @@ -202,7 +206,7 @@ define @insertelt_nxv32i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv32i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e8, m4, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m4, tu, ma @@ -215,8 +219,10 @@ define @insertelt_nxv64i8_0( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv64i8_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli a1, zero, e8, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %r = insertelement %v, i8 %elt, i32 0 ret %r @@ -225,7 +231,7 @@ define @insertelt_nxv64i8_imm( %v, i8 signext %elt) { ; CHECK-LABEL: insertelt_nxv64i8_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e8, m8, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, m8, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 3 @@ -237,7 +243,7 @@ define @insertelt_nxv64i8_idx( %v, i8 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv64i8_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e8, m8, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma @@ -355,8 +361,10 @@ define @insertelt_nxv8i16_0( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i16_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli a1, zero, e16, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 0 ret %r @@ -365,7 +373,7 @@ define @insertelt_nxv8i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 @@ -377,7 +385,7 @@ define @insertelt_nxv8i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma @@ -390,8 +398,10 @@ define @insertelt_nxv16i16_0( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i16_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a1, zero, e16, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 0 ret %r @@ -400,7 +410,7 @@ define @insertelt_nxv16i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m4, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 3 @@ -412,7 +422,7 @@ define @insertelt_nxv16i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m4, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma @@ -425,8 +435,10 @@ define @insertelt_nxv32i16_0( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i16_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli a1, zero, e16, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %r = insertelement %v, i16 %elt, i32 0 ret %r @@ -435,7 +447,7 @@ define @insertelt_nxv32i16_imm( %v, i16 signext %elt) { ; CHECK-LABEL: insertelt_nxv32i16_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e16, m8, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetivli zero, 4, e16, m8, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 3 @@ -447,7 +459,7 @@ define @insertelt_nxv32i16_idx( %v, i16 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv32i16_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e16, m8, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e16, m8, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma @@ -530,8 +542,10 @@ define @insertelt_nxv4i32_0( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv4i32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli a1, zero, e32, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 0 ret %r @@ -540,7 +554,7 @@ define @insertelt_nxv4i32_imm( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv4i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 @@ -552,7 +566,7 @@ define @insertelt_nxv4i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv4i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma @@ -565,8 +579,10 @@ define @insertelt_nxv8i32_0( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a1, zero, e32, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 0 ret %r @@ -575,7 +591,7 @@ define @insertelt_nxv8i32_imm( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv8i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m4, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 3 @@ -587,7 +603,7 @@ define @insertelt_nxv8i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv8i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m4, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma @@ -600,8 +616,10 @@ define @insertelt_nxv16i32_0( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i32_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli a1, zero, e32, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %r = insertelement %v, i32 %elt, i32 0 ret %r @@ -610,7 +628,7 @@ define @insertelt_nxv16i32_imm( %v, i32 signext %elt) { ; CHECK-LABEL: insertelt_nxv16i32_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e32, m8, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetivli zero, 4, e32, m8, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 3 @@ -622,7 +640,7 @@ define @insertelt_nxv16i32_idx( %v, i32 signext %elt, i32 zeroext %idx) { ; CHECK-LABEL: insertelt_nxv16i32_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e32, m8, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: addi a0, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma @@ -672,8 +690,10 @@ define @insertelt_nxv2i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv2i64_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli a1, zero, e64, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 0 ret %r @@ -682,7 +702,7 @@ define @insertelt_nxv2i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv2i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 @@ -694,7 +714,7 @@ define @insertelt_nxv2i64_idx( %v, i64 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv2i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e64, m2, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e64, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: slli a1, a1, 32 ; CHECK-NEXT: srli a1, a1, 32 @@ -709,8 +729,10 @@ define @insertelt_nxv4i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv4i64_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli a1, zero, e64, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 0 ret %r @@ -719,7 +741,7 @@ define @insertelt_nxv4i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv4i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m4, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v12, 3 @@ -731,7 +753,7 @@ define @insertelt_nxv4i64_idx( %v, i64 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv4i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e64, m4, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e64, m4, tu, ma ; CHECK-NEXT: vmv.s.x v12, a0 ; CHECK-NEXT: slli a1, a1, 32 ; CHECK-NEXT: srli a1, a1, 32 @@ -746,8 +768,10 @@ define @insertelt_nxv8i64_0( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv8i64_0: ; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli a1, zero, e64, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret %r = insertelement %v, i64 %elt, i32 0 ret %r @@ -756,7 +780,7 @@ define @insertelt_nxv8i64_imm( %v, i64 %elt) { ; CHECK-LABEL: insertelt_nxv8i64_imm: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m8, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: vsetivli zero, 4, e64, m8, tu, ma ; CHECK-NEXT: vslideup.vi v8, v16, 3 @@ -768,7 +792,7 @@ define @insertelt_nxv8i64_idx( %v, i64 %elt, i32 %idx) { ; CHECK-LABEL: insertelt_nxv8i64_idx: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a2, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e64, m8, tu, ma ; CHECK-NEXT: vmv.s.x v16, a0 ; CHECK-NEXT: slli a1, a1, 32 ; CHECK-NEXT: srli a1, a1, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f.ll b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmv.s.f.ll @@ -48,8 +48,10 @@ define @intrinsic_vfmv.s.f_f_nxv8f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv8f16( %0, half %1, iXLen %2) @@ -61,8 +63,10 @@ define @intrinsic_vfmv.s.f_f_nxv16f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv16f16( %0, half %1, iXLen %2) @@ -74,8 +78,10 @@ define @intrinsic_vfmv.s.f_f_nxv32f16( %0, half %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv32f16( %0, half %1, iXLen %2) @@ -113,8 +119,10 @@ define @intrinsic_vfmv.s.f_f_nxv4f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv4f32( %0, float %1, iXLen %2) @@ -126,8 +134,10 @@ define @intrinsic_vfmv.s.f_f_nxv8f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv8f32( %0, float %1, iXLen %2) @@ -139,8 +149,10 @@ define @intrinsic_vfmv.s.f_f_nxv16f32( %0, float %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv16f32( %0, float %1, iXLen %2) @@ -165,8 +177,10 @@ define @intrinsic_vfmv.s.f_f_nxv2f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv2f64( %0, double %1, iXLen %2) @@ -178,8 +192,10 @@ define @intrinsic_vfmv.s.f_f_nxv4f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v12, fa0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv4f64( %0, double %1, iXLen %2) @@ -191,8 +207,10 @@ define @intrinsic_vfmv.s.f_f_nxv8f64( %0, double %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma -; CHECK-NEXT: vfmv.s.f v8, fa0 +; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv8f64( %0, double %1, iXLen %2) @@ -235,8 +253,10 @@ define @intrinsic_vfmv.s.f_f_zero_nxv8f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e16, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv8f16( %0, half 0.0, iXLen %1) @@ -246,8 +266,10 @@ define @intrinsic_vfmv.s.f_f_zero_nxv16f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vmv.s.x v12, zero +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv16f16( %0, half 0.0, iXLen %1) @@ -257,8 +279,10 @@ define @intrinsic_vfmv.s.f_f_zero_nxv32f16( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli zero, a0, e16, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv32f16( %0, half 0.0, iXLen %1) @@ -290,8 +314,10 @@ define @intrinsic_vfmv.s.f_f_zero_nxv4f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv4f32( %0, float 0.0, iXLen %1) @@ -301,8 +327,10 @@ define @intrinsic_vfmv.s.f_f_zero_nxv8f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vmv.s.x v12, zero +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv8f32( %0, float 0.0, iXLen %1) @@ -312,8 +340,10 @@ define @intrinsic_vfmv.s.f_f_zero_nxv16f32( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv16f32( %0, float 0.0, iXLen %1) @@ -334,8 +364,10 @@ define @intrinsic_vfmv.s.f_f_zero_nxv2f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vmv.s.x v10, zero +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv2f64( %0, double 0.0, iXLen %1) @@ -345,8 +377,10 @@ define @intrinsic_vfmv.s.f_f_zero_nxv4f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vmv.s.x v12, zero +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv4f64( %0, double 0.0, iXLen %1) @@ -356,8 +390,10 @@ define @intrinsic_vfmv.s.f_f_zero_nxv8f64( %0, iXLen %1) nounwind { ; CHECK-LABEL: intrinsic_vfmv.s.f_f_zero_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vmv.s.x v16, zero +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmv.s.f.nxv8f64( %0, double 0.0, iXLen %1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv32.ll @@ -58,8 +58,10 @@ define @intrinsic_vmv.s.x_x_nxv16i8( %0, i8 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv16i8( %0, i8 %1, i32 %2) @@ -71,8 +73,10 @@ define @intrinsic_vmv.s.x_x_nxv32i8( %0, i8 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv32i8( %0, i8 %1, i32 %2) @@ -84,8 +88,10 @@ define @intrinsic_vmv.s.x_x_nxv64i8( %0, i8 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv64i8( %0, i8 %1, i32 %2) @@ -136,8 +142,10 @@ define @intrinsic_vmv.s.x_x_nxv8i16( %0, i16 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv8i16( %0, i16 %1, i32 %2) @@ -149,8 +157,10 @@ define @intrinsic_vmv.s.x_x_nxv16i16( %0, i16 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv16i16( %0, i16 %1, i32 %2) @@ -162,8 +172,10 @@ define @intrinsic_vmv.s.x_x_nxv32i16( %0, i16 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv32i16( %0, i16 %1, i32 %2) @@ -201,8 +213,10 @@ define @intrinsic_vmv.s.x_x_nxv4i32( %0, i32 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv4i32( %0, i32 %1, i32 %2) @@ -214,8 +228,10 @@ define @intrinsic_vmv.s.x_x_nxv8i32( %0, i32 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv8i32( %0, i32 %1, i32 %2) @@ -227,8 +243,10 @@ define @intrinsic_vmv.s.x_x_nxv16i32( %0, i32 %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv16i32( %0, i32 %1, i32 %2) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x-rv64.ll @@ -58,8 +58,10 @@ define @intrinsic_vmv.s.x_x_nxv16i8( %0, i8 %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv16i8( %0, i8 %1, i64 %2) @@ -71,8 +73,10 @@ define @intrinsic_vmv.s.x_x_nxv32i8( %0, i8 %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv32i8( %0, i8 %1, i64 %2) @@ -84,8 +88,10 @@ define @intrinsic_vmv.s.x_x_nxv64i8( %0, i8 %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv64i8( %0, i8 %1, i64 %2) @@ -136,8 +142,10 @@ define @intrinsic_vmv.s.x_x_nxv8i16( %0, i16 %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv8i16( %0, i16 %1, i64 %2) @@ -149,8 +157,10 @@ define @intrinsic_vmv.s.x_x_nxv16i16( %0, i16 %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv16i16( %0, i16 %1, i64 %2) @@ -162,8 +172,10 @@ define @intrinsic_vmv.s.x_x_nxv32i16( %0, i16 %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv32i16( %0, i16 %1, i64 %2) @@ -201,8 +213,10 @@ define @intrinsic_vmv.s.x_x_nxv4i32( %0, i32 %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv4i32( %0, i32 %1, i64 %2) @@ -214,8 +228,10 @@ define @intrinsic_vmv.s.x_x_nxv8i32( %0, i32 %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv8i32( %0, i32 %1, i64 %2) @@ -227,8 +243,10 @@ define @intrinsic_vmv.s.x_x_nxv16i32( %0, i32 %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv16i32( %0, i32 %1, i64 %2) @@ -253,8 +271,10 @@ define @intrinsic_vmv.s.x_x_nxv2i64( %0, i64 %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv2i64( %0, i64 %1, i64 %2) @@ -266,8 +286,10 @@ define @intrinsic_vmv.s.x_x_nxv4i64( %0, i64 %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v12, a0 +; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv4i64( %0, i64 %1, i64 %2) @@ -279,8 +301,10 @@ define @intrinsic_vmv.s.x_x_nxv8i64( %0, i64 %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vmv.s.x_x_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vmv1r.v v16, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma -; CHECK-NEXT: vmv.s.x v8, a0 +; CHECK-NEXT: vmv.s.x v16, a0 +; CHECK-NEXT: vmv1r.v v8, v16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmv.s.x.nxv8i64( %0, i64 %1, i64 %2) diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -655,38 +655,42 @@ ; RV32MV-NEXT: vmerge.vim v10, v10, -1, v0 ; RV32MV-NEXT: vand.vv v8, v8, v10 ; RV32MV-NEXT: li a0, 2 +; RV32MV-NEXT: vsetvli zero, zero, e32, m2, tu, ma ; RV32MV-NEXT: vmv.s.x v10, a0 ; RV32MV-NEXT: li a0, 1 -; RV32MV-NEXT: vmv.s.x v12, a0 -; RV32MV-NEXT: vmv.v.i v14, 0 +; RV32MV-NEXT: vmv.s.x v11, a0 +; RV32MV-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32MV-NEXT: vmv.v.i v12, 0 +; RV32MV-NEXT: vmv1r.v v14, v11 ; RV32MV-NEXT: vsetivli zero, 3, e32, m2, tu, ma -; RV32MV-NEXT: vslideup.vi v14, v12, 2 +; RV32MV-NEXT: vslideup.vi v12, v14, 2 ; RV32MV-NEXT: vsetivli zero, 5, e32, m2, tu, ma -; RV32MV-NEXT: vslideup.vi v14, v10, 4 +; RV32MV-NEXT: vslideup.vi v12, v10, 4 ; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32MV-NEXT: vmsne.vv v0, v8, v14 +; RV32MV-NEXT: vmsne.vv v0, v8, v12 ; RV32MV-NEXT: vmv.v.i v8, 0 ; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0 ; RV32MV-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32MV-NEXT: vse32.v v8, (s2) ; RV32MV-NEXT: vslidedown.vi v10, v8, 1 +; RV32MV-NEXT: vse32.v v8, (s2) +; RV32MV-NEXT: vslidedown.vi v12, v8, 2 ; RV32MV-NEXT: vmv.x.s a0, v10 -; RV32MV-NEXT: vslidedown.vi v10, v8, 2 +; RV32MV-NEXT: vmv1r.v v10, v12 ; RV32MV-NEXT: vmv.x.s a1, v10 ; RV32MV-NEXT: slli a2, a1, 1 ; RV32MV-NEXT: sub a2, a2, a0 -; RV32MV-NEXT: sw a2, 4(s2) ; RV32MV-NEXT: vslidedown.vi v10, v8, 4 +; RV32MV-NEXT: sw a2, 4(s2) ; RV32MV-NEXT: vmv.x.s a0, v10 -; RV32MV-NEXT: srli a2, a0, 30 ; RV32MV-NEXT: vslidedown.vi v10, v8, 5 +; RV32MV-NEXT: srli a2, a0, 30 ; RV32MV-NEXT: vmv.x.s a3, v10 ; RV32MV-NEXT: slli a3, a3, 2 ; RV32MV-NEXT: or a2, a3, a2 ; RV32MV-NEXT: andi a2, a2, 7 ; RV32MV-NEXT: sb a2, 12(s2) -; RV32MV-NEXT: srli a1, a1, 31 ; RV32MV-NEXT: vslidedown.vi v8, v8, 3 +; RV32MV-NEXT: srli a1, a1, 31 ; RV32MV-NEXT: vmv.x.s a2, v8 ; RV32MV-NEXT: andi a2, a2, 1 ; RV32MV-NEXT: slli a2, a2, 1 @@ -772,9 +776,10 @@ ; RV64MV-NEXT: slli a3, a2, 31 ; RV64MV-NEXT: srli a3, a3, 61 ; RV64MV-NEXT: sb a3, 12(a0) +; RV64MV-NEXT: vslidedown.vi v10, v8, 1 ; RV64MV-NEXT: vmv.x.s a3, v8 ; RV64MV-NEXT: and a1, a3, a1 -; RV64MV-NEXT: vslidedown.vi v8, v8, 1 +; RV64MV-NEXT: vmv1r.v v8, v10 ; RV64MV-NEXT: vmv.x.s a3, v8 ; RV64MV-NEXT: slli a4, a3, 33 ; RV64MV-NEXT: or a1, a1, a4