diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -164,6 +164,12 @@ // value. The fourth and fifth operands are the mask and VL operands. VSLIDE1UP_VL, VSLIDE1DOWN_VL, + // Matches the semantics of vfslide1up/vfslide1down. The first operand is + // passthru operand, the second is source vector, third is a scalar value + // whose type matches the element type of the vectors. The fourth and fifth + // operands are the mask and VL operands. + VFSLIDE1UP_VL, + VFSLIDE1DOWN_VL, // Matches the semantics of the vid.v instruction, with a mask and VL // operand. VID_VL, diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3218,15 +3218,13 @@ // For constant vectors, use generic constant pool lowering. Otherwise, // we'd have to materialize constants in GPRs just to move them into the // vector. - if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) + if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) return SDValue(); - // We can use a series of vslide1down instructions to move values in GPRs - // into the appropriate place in the result vector. We use slide1down - // to avoid the register group overlap constraint of vslide1up. - if (VT.isFloatingPoint()) - // TODO: Use vfslide1down. - return SDValue(); + assert((!VT.isFloatingPoint() || + VT.getVectorElementType().getSizeInBits() <= Subtarget.getFLen()) && + "Illegal type which will result in reserved encoding"); const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC; @@ -3243,8 +3241,10 @@ Vec, Offset, Mask, VL, Policy); UndefCount = 0; } - Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, ContainerVT, - DAG.getUNDEF(ContainerVT), Vec, V, Mask, VL); + auto OpCode = + VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; + Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, + V, Mask, VL); } if (UndefCount) { const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); @@ -15161,6 +15161,8 @@ NODE_NAME_CASE(VSLIDE1UP_VL) NODE_NAME_CASE(VSLIDEDOWN_VL) NODE_NAME_CASE(VSLIDE1DOWN_VL) + NODE_NAME_CASE(VFSLIDE1UP_VL) + NODE_NAME_CASE(VFSLIDE1DOWN_VL) NODE_NAME_CASE(VID_VL) NODE_NAME_CASE(VFNCVT_ROD_VL) NODE_NAME_CASE(VECREDUCE_ADD_VL) diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -2456,11 +2456,18 @@ SDTCisVT<3, XLenVT>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>, SDTCisVT<5, XLenVT> ]>; +def SDTRVVFSlide1 : SDTypeProfile<1, 5, [ + SDTCisVec<0>, SDTCisSameAs<1, 0>, SDTCisSameAs<2, 0>, SDTCisFP<0>, + SDTCisEltOfVec<3, 0>, SDTCVecEltisVT<4, i1>, SDTCisSameNumEltsAs<0, 4>, + SDTCisVT<5, XLenVT> +]>; def riscv_slideup_vl : SDNode<"RISCVISD::VSLIDEUP_VL", SDTRVVSlide, []>; def riscv_slide1up_vl : SDNode<"RISCVISD::VSLIDE1UP_VL", SDTRVVSlide1, []>; def riscv_slidedown_vl : SDNode<"RISCVISD::VSLIDEDOWN_VL", SDTRVVSlide, []>; def riscv_slide1down_vl : SDNode<"RISCVISD::VSLIDE1DOWN_VL", SDTRVVSlide1, []>; +def riscv_fslide1up_vl : SDNode<"RISCVISD::VFSLIDE1UP_VL", SDTRVVFSlide1, []>; +def riscv_fslide1down_vl : SDNode<"RISCVISD::VFSLIDE1DOWN_VL", SDTRVVFSlide1, []>; foreach vti = AllIntegerVectors in { let Predicates = GetVTypePredicates.Predicates in { @@ -2495,6 +2502,35 @@ } } +foreach vti = AllFloatVectors in { + let Predicates = GetVTypePredicates.Predicates in { + def : Pat<(vti.Vector (riscv_fslide1up_vl (vti.Vector undef), + (vti.Vector vti.RegClass:$rs1), + vti.Scalar:$rs2, (vti.Mask true_mask), + VLOpFrag)), + (!cast("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_fslide1up_vl (vti.Vector vti.RegClass:$rd), + (vti.Vector vti.RegClass:$rs1), + vti.Scalar:$rs2, (vti.Mask true_mask), + VLOpFrag)), + (!cast("PseudoVFSLIDE1UP_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_TU") + vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_fslide1down_vl (vti.Vector undef), + (vti.Vector vti.RegClass:$rs1), + vti.Scalar:$rs2, (vti.Mask true_mask), + VLOpFrag)), + (!cast("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX) + vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Vector (riscv_fslide1down_vl (vti.Vector vti.RegClass:$rd), + (vti.Vector vti.RegClass:$rs1), + vti.Scalar:$rs2, (vti.Mask true_mask), + VLOpFrag)), + (!cast("PseudoVFSLIDE1DOWN_V"#vti.ScalarSuffix#"_"#vti.LMul.MX#"_TU") + vti.RegClass:$rd, vti.RegClass:$rs1, vti.ScalarRegClass:$rs2, GPR:$vl, vti.Log2SEW)>; + } +} + foreach vti = AllVectors in { let Predicates = GetVTypePredicates.Predicates in { def : Pat<(vti.Vector (riscv_slideup_vl (vti.Vector vti.RegClass:$rs3), diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -291,14 +291,9 @@ define <2 x half> @buildvec_v2f16(half %a, half %b) { ; CHECK-LABEL: buildvec_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: fsh fa1, 14(sp) -; CHECK-NEXT: fsh fa0, 12(sp) -; CHECK-NEXT: addi a0, sp, 12 ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 ; CHECK-NEXT: ret %v1 = insertelement <2 x half> poison, half %a, i64 0 %v2 = insertelement <2 x half> %v1, half %b, i64 1 @@ -308,14 +303,9 @@ define <2 x float> @buildvec_v2f32(float %a, float %b) { ; CHECK-LABEL: buildvec_v2f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: fsw fa1, 12(sp) -; CHECK-NEXT: fsw fa0, 8(sp) -; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 ; CHECK-NEXT: ret %v1 = insertelement <2 x float> poison, float %a, i64 0 %v2 = insertelement <2 x float> %v1, float %b, i64 1 @@ -325,14 +315,9 @@ define <2 x double> @buildvec_v2f64(double %a, double %b) { ; CHECK-LABEL: buildvec_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: fsd fa1, 8(sp) -; CHECK-NEXT: fsd fa0, 0(sp) -; CHECK-NEXT: mv a0, sp ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 ; CHECK-NEXT: ret %v1 = insertelement <2 x double> poison, double %a, i64 0 %v2 = insertelement <2 x double> %v1, double %b, i64 1 @@ -342,14 +327,9 @@ define <2 x double> @buildvec_v2f64_b(double %a, double %b) { ; CHECK-LABEL: buildvec_v2f64_b: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: fsd fa1, 8(sp) -; CHECK-NEXT: fsd fa0, 0(sp) -; CHECK-NEXT: mv a0, sp ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 ; CHECK-NEXT: ret %v1 = insertelement <2 x double> poison, double %b, i64 1 %v2 = insertelement <2 x double> %v1, double %a, i64 0 @@ -359,16 +339,11 @@ define <4 x float> @buildvec_v4f32(float %a, float %b, float %c, float %d) { ; CHECK-LABEL: buildvec_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: fsw fa3, 12(sp) -; CHECK-NEXT: fsw fa2, 8(sp) -; CHECK-NEXT: fsw fa1, 4(sp) -; CHECK-NEXT: fsw fa0, 0(sp) -; CHECK-NEXT: mv a0, sp ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa0 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 ; CHECK-NEXT: ret %v1 = insertelement <4 x float> poison, float %a, i64 0 %v2 = insertelement <4 x float> %v1, float %b, i64 1