Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3678,20 +3678,10 @@ DAG.getConstant(0, DL, XLenVT)); } - if (VT.isFloatingPoint()) { - // TODO: Use vmv.v.i for appropriate constants - // Use M1 or smaller to avoid over constraining register allocation - const MVT M1VT = getLMUL1VT(VT); - auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT; - SDValue Result = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, InnerVT, - DAG.getUNDEF(InnerVT), Scalar, VL); - if (VT != InnerVT) - Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, - DAG.getUNDEF(VT), - Result, DAG.getConstant(0, DL, XLenVT)); - return Result; - } + if (VT.isFloatingPoint()) + return DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, VT, + DAG.getUNDEF(VT), Scalar, VL); // Avoid the tricky legalization cases by falling back to using the // splat code which already handles it gracefully. @@ -3707,24 +3697,8 @@ unsigned ExtOpc = isa(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND; Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar); - // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or - // higher would involve overly constraining the register allocator for - // no purpose. - if (ConstantSDNode *Const = dyn_cast(Scalar)) { - if (!isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) && - VT.bitsLE(getLMUL1VT(VT))) - return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); - } - // Use M1 or smaller to avoid over constraining register allocation - const MVT M1VT = getLMUL1VT(VT); - auto InnerVT = VT.bitsLE(M1VT) ? VT : M1VT; - SDValue Result = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, InnerVT, - DAG.getUNDEF(InnerVT), Scalar, VL); - if (VT != InnerVT) - Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, - DAG.getUNDEF(VT), - Result, DAG.getConstant(0, DL, XLenVT)); - return Result; + return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, VT, + DAG.getUNDEF(VT), Scalar, VL); } // Is this a shuffle extracts either the even or odd elements of a vector? @@ -13333,6 +13307,8 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; + const MVT XLenVT = Subtarget.getXLenVT(); + SDLoc DL(N); // Helper to call SimplifyDemandedBits on an operand of N where only some low // bits are demanded. N will be added to the Worklist if it was not deleted. @@ -13364,8 +13340,6 @@ return DCI.CombineTo(N, Lo, Hi); } - SDLoc DL(N); - // It's cheaper to materialise two 32-bit integers than to load a double // from the constant pool and transfer it to integer registers through the // stack. @@ -13699,7 +13673,6 @@ } EVT IndexVT = Index.getValueType(); - MVT XLenVT = Subtarget.getXLenVT(); // RISC-V indexed loads only support the "unsigned unscaled" addressing // mode, so anything else must be manually legalized. bool NeedsIdxLegalization = @@ -13949,6 +13922,32 @@ return Src.getOperand(0); // TODO: Use insert_subvector/extract_subvector to change widen/narrow? } + [[fallthrough]]; + } + case RISCVISD::VMV_S_X_VL: { + const MVT VT = N->getSimpleValueType(0); + SDValue Passthru = N->getOperand(0); + SDValue Scalar = N->getOperand(1); + SDValue VL = N->getOperand(2); + + // Use M1 or smaller to avoid over constraining register allocation + const MVT M1VT = getLMUL1VT(VT); + if (M1VT.bitsLT(VT) && Passthru.isUndef()) { + SDValue Result = + DAG.getNode(N->getOpcode(), DL, M1VT, Passthru, Scalar, VL); + Result = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), + Result, DAG.getConstant(0, DL, XLenVT)); + return Result; + } + + // We use a vmv.v.i if possible. We limit this to LMUL1. LMUL2 or + // higher would involve overly constraining the register allocator for + // no purpose. + if (ConstantSDNode *Const = dyn_cast(Scalar); + Const && !isNullConstant(Scalar) && isInt<5>(Const->getSExtValue()) && + VT.bitsLE(getLMUL1VT(VT)) && Passthru.isUndef()) + return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Passthru, Scalar, VL); + break; } case ISD::INTRINSIC_VOID: Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -247,9 +247,8 @@ define <8 x i64> @insertelt_v8i64(<8 x i64> %a, i32 %idx) { ; RV32-LABEL: insertelt_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, -1 ; RV32-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v12, a1 +; RV32-NEXT: vmv.v.i v12, -1 ; RV32-NEXT: addi a1, a0, 1 ; RV32-NEXT: vsetvli zero, a1, e64, m4, tu, ma ; RV32-NEXT: vslideup.vx v8, v12, a0 @@ -257,9 +256,8 @@ ; ; RV64-LABEL: insertelt_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, -1 ; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, a1 +; RV64-NEXT: vmv.v.i v12, -1 ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: addi a1, a0, 1 @@ -327,9 +325,8 @@ define <8 x i64> @insertelt_c6_v8i64(<8 x i64> %a, i32 %idx) { ; RV32-LABEL: insertelt_c6_v8i64: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 6 ; RV32-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; RV32-NEXT: vmv.s.x v12, a1 +; RV32-NEXT: vmv.v.i v12, 6 ; RV32-NEXT: addi a1, a0, 1 ; RV32-NEXT: vsetvli zero, a1, e64, m4, tu, ma ; RV32-NEXT: vslideup.vx v8, v12, a0 @@ -337,9 +334,8 @@ ; ; RV64-LABEL: insertelt_c6_v8i64: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 6 ; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; RV64-NEXT: vmv.s.x v12, a1 +; RV64-NEXT: vmv.v.i v12, 6 ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: srli a0, a0, 32 ; RV64-NEXT: addi a1, a0, 1 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -696,18 +696,18 @@ ; CHECK-NEXT: vsrl.vi v8, v8, 1 ; CHECK-NEXT: vadd.vi v8, v8, 3 ; CHECK-NEXT: vse16.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 3 ; CHECK-NEXT: vse16.v v8, (a1) ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: vse16.v v8, (a4) -; CHECK-NEXT: vmv.v.i v8, 3 -; CHECK-NEXT: vmv.v.i v9, 4 +; CHECK-NEXT: vmv.v.i v8, 4 ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v9, v8, 1 +; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vse16.v v9, (a5) +; CHECK-NEXT: vse16.v v8, (a5) ; CHECK-NEXT: li a0, 4 -; CHECK-NEXT: vslide1down.vx v8, v8, a0 +; CHECK-NEXT: vslide1down.vx v8, v9, a0 ; CHECK-NEXT: vse16.v v8, (a6) ; CHECK-NEXT: ret store <4 x i16> , ptr %z0 Index: llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -945,15 +945,15 @@ ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: li a1, 1 ; RV64-NEXT: vmv.v.i v12, 7 -; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a3, a2, 4 -; RV64-NEXT: add a2, a3, a2 -; RV64-NEXT: add a2, sp, a2 -; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vs4r.v v12, (a2) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v16, a1 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 8, e64, m1, ta, ma +; RV64-NEXT: vmv.v.i v16, 1 ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: add a1, sp, a1 Index: llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll +++ llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll @@ -781,9 +781,9 @@ define @insertelt_nxv2i64_imm_c10( %v) { ; CHECK-LABEL: insertelt_nxv2i64_imm_c10: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 10 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, 10 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 10, i32 3 @@ -793,9 +793,8 @@ define @insertelt_nxv2i64_idx_c10( %v, i32 %idx) { ; CHECK-LABEL: insertelt_nxv2i64_idx_c10: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, 10 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v10, a0 @@ -818,9 +817,9 @@ define @insertelt_nxv2i64_imm_cn1( %v) { ; CHECK-LABEL: insertelt_nxv2i64_imm_cn1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -1 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma -; CHECK-NEXT: vmv.s.x v10, a0 ; CHECK-NEXT: vslideup.vi v8, v10, 3 ; CHECK-NEXT: ret %r = insertelement %v, i64 -1, i32 3 @@ -830,9 +829,8 @@ define @insertelt_nxv2i64_idx_cn1( %v, i32 %idx) { ; CHECK-LABEL: insertelt_nxv2i64_idx_cn1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a1, -1 -; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma -; CHECK-NEXT: vmv.s.x v10, a1 +; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, -1 ; CHECK-NEXT: addi a1, a0, 1 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vx v8, v10, a0 Index: llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll =================================================================== --- llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll +++ llvm/test/CodeGen/RISCV/rvv/splat-vector-split-i64-vl-sdnode.ll @@ -8,9 +8,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: li a0, 3 -; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma -; CHECK-NEXT: vmv.s.x v10, a0 +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vmv.v.i v10, 3 +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 3 Index: llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll =================================================================== --- llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -668,10 +668,10 @@ ; RV32MV-NEXT: vmv.v.i v10, 1 ; RV32MV-NEXT: vmerge.vim v10, v10, -1, v0 ; RV32MV-NEXT: vand.vv v8, v8, v10 -; RV32MV-NEXT: li a0, 2 -; RV32MV-NEXT: vmv.s.x v10, a0 -; RV32MV-NEXT: li a0, 1 -; RV32MV-NEXT: vmv.s.x v12, a0 +; RV32MV-NEXT: vsetivli zero, 8, e32, m1, ta, ma +; RV32MV-NEXT: vmv.v.i v10, 2 +; RV32MV-NEXT: vmv.v.i v12, 1 +; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32MV-NEXT: vmv.v.i v14, 0 ; RV32MV-NEXT: vsetivli zero, 3, e32, m2, tu, ma ; RV32MV-NEXT: vslideup.vi v14, v12, 2