diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -571,6 +571,8 @@ setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand); setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand); } + + setOperationAction(ISD::VP_FPTOSI, VT, Custom); } for (MVT VT : IntVecVTs) { @@ -6171,10 +6173,10 @@ // Widen before converting. MVT IntVT = MVT::getVectorVT(MVT::getIntegerVT(DstEltSize / 2), DstVT.getVectorElementCount()); - Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, {Src, Mask, VL}); + Src = DAG.getNode(RISCVISDExtOpc, DL, IntVT, Src, Mask, VL); } - Result = DAG.getNode(RISCVISDOpc, DL, DstVT, {Src, Mask, VL}); + Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); } else { assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && "Wrong input/output vector types"); @@ -6184,11 +6186,11 @@ assert(SrcVT.getVectorElementType() == MVT::f16 && "Unexpected type!"); MVT InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); - Src = DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, - {Src, Mask, VL}); + Src = + DAG.getNode(RISCVISD::FP_EXTEND_VL, DL, InterimFVT, Src, Mask, VL); } - Result = DAG.getNode(RISCVISDOpc, DL, DstVT, {Src, Mask, VL}); + Result = DAG.getNode(RISCVISDOpc, DL, DstVT, Src, Mask, VL); } } else { // Narrowing + Conversion if (SrcVT.isInteger()) { @@ -6203,11 +6205,11 @@ InterimFVT = MVT::getVectorVT(MVT::f32, DstVT.getVectorElementCount()); } - Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, {Src, Mask, VL}); + Result = DAG.getNode(RISCVISDOpc, DL, InterimFVT, Src, Mask, VL); if (InterimFVT != DstVT) { Src = Result; - Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, {Src, Mask, VL}); + Result = DAG.getNode(RISCVISD::FP_ROUND_VL, DL, DstVT, Src, Mask, VL); } } else { assert(SrcVT.isFloatingPoint() && DstVT.isInteger() && @@ -6215,21 +6217,41 @@ // First do a narrowing conversion to an integer half the size, then // truncate if needed. - // TODO: Handle mask vectors - assert(DstVT.getVectorElementType() != MVT::i1 && - "Don't know how to handle masks yet!"); - MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), - DstVT.getVectorElementCount()); + if (DstEltSize == 1) { + // First convert to a narrower integer, then convert to mask by via + // and+setcc. + assert(SrcEltSize >= 16 && "Unexpected FP type!"); + MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), + DstVT.getVectorElementCount()); + Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); + + // Mask bit 1 and check if it is non-zero. + // TODO: Is the mask necessary? Out of range conversions are undefined. + MVT XLenVT = Subtarget.getXLenVT(); + SDValue SplatZero = DAG.getConstant(0, DL, XLenVT); + SDValue SplatOne = DAG.getConstant(1, DL, XLenVT); + SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT, + DAG.getUNDEF(InterimIVT), SplatZero); + SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, InterimIVT, + DAG.getUNDEF(InterimIVT), SplatOne); + Result = DAG.getNode(RISCVISD::AND_VL, DL, InterimIVT, + {Result, SplatOne, Mask, VL}); + Result = DAG.getNode(RISCVISD::SETCC_VL, DL, DstVT, Result, SplatZero, + DAG.getCondCode(ISD::SETNE), Mask, VL); + } else { + MVT InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), + DstVT.getVectorElementCount()); - Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, {Src, Mask, VL}); + Result = DAG.getNode(RISCVISDOpc, DL, InterimIVT, Src, Mask, VL); - while (InterimIVT != DstVT) { - SrcEltSize /= 2; - Src = Result; - InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), - DstVT.getVectorElementCount()); - Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT, - {Src, Mask, VL}); + while (InterimIVT != DstVT) { + SrcEltSize /= 2; + Src = Result; + InterimIVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize / 2), + DstVT.getVectorElementCount()); + Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, InterimIVT, + Src, Mask, VL); + } } } } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -458,6 +458,15 @@ (!cast(instruction_name#"_VV_"#vti.LMul.MX) vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), + vti.RegClass:$rs2, cc, + (vti.Mask V0), + VLOpFrag)), + (!cast(instruction_name#"_VV_"#vti.LMul.MX#"_MASK") + (vti.Mask (IMPLICIT_DEF)), + vti.RegClass:$rs1, + vti.RegClass:$rs2, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; } // Inherits from VPatIntegerSetCCVL_VV and adds a pattern with operands swapped. @@ -471,47 +480,93 @@ (!cast(instruction_name#"_VV_"#vti.LMul.MX) vti.RegClass:$rs1, vti.RegClass:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs2), + vti.RegClass:$rs1, invcc, + (vti.Mask V0), + VLOpFrag)), + (!cast(instruction_name#"_VV_"#vti.LMul.MX#"_MASK") + (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + vti.RegClass:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + TAIL_AGNOSTIC)>; } multiclass VPatIntegerSetCCVL_VX_Swappable { defvar instruction = !cast(instruction_name#"_VX_"#vti.LMul.MX); + defvar instruction_masked = !cast(instruction_name#"_VX_"#vti.LMul.MX#"_MASK"); def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), (SplatPat (XLenVT GPR:$rs2)), cc, (vti.Mask true_mask), VLOpFrag)), (instruction vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), + (SplatPat (XLenVT GPR:$rs2)), cc, + (vti.Mask V0), + VLOpFrag)), + (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + TAIL_AGNOSTIC)>; def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat (XLenVT GPR:$rs2)), (vti.Vector vti.RegClass:$rs1), invcc, (vti.Mask true_mask), VLOpFrag)), (instruction vti.RegClass:$rs1, GPR:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat (XLenVT GPR:$rs2)), + (vti.Vector vti.RegClass:$rs1), invcc, + (vti.Mask V0), + VLOpFrag)), + (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + GPR:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + TAIL_AGNOSTIC)>; } multiclass VPatIntegerSetCCVL_VI_Swappable { defvar instruction = !cast(instruction_name#"_VI_"#vti.LMul.MX); + defvar instruction_masked = !cast(instruction_name#"_VI_"#vti.LMul.MX#"_MASK"); def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), (SplatPat_simm5 simm5:$rs2), cc, (vti.Mask true_mask), VLOpFrag)), (instruction vti.RegClass:$rs1, XLenVT:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), + (SplatPat_simm5 simm5:$rs2), cc, + (vti.Mask V0), + VLOpFrag)), + (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + XLenVT:$rs2, (vti.Mask V0), GPR:$vl, + vti.Log2SEW,TAIL_AGNOSTIC)>; def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat_simm5 simm5:$rs2), (vti.Vector vti.RegClass:$rs1), invcc, (vti.Mask true_mask), VLOpFrag)), (instruction vti.RegClass:$rs1, simm5:$rs2, GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Mask (riscv_setcc_vl (SplatPat_simm5 simm5:$rs2), + (vti.Vector vti.RegClass:$rs1), invcc, + (vti.Mask V0), + VLOpFrag)), + (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + simm5:$rs2, (vti.Mask V0), GPR:$vl, vti.Log2SEW, + TAIL_AGNOSTIC)>; } multiclass VPatIntegerSetCCVL_VIPlus1 { defvar instruction = !cast(instruction_name#"_VI_"#vti.LMul.MX); + defvar instruction_masked = !cast(instruction_name#"_VI_"#vti.LMul.MX#"_MASK"); def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), (splatpat_kind simm5:$rs2), cc, (vti.Mask true_mask), VLOpFrag)), (instruction vti.RegClass:$rs1, (DecImm simm5:$rs2), GPR:$vl, vti.Log2SEW)>; + def : Pat<(vti.Mask (riscv_setcc_vl (vti.Vector vti.RegClass:$rs1), + (splatpat_kind simm5:$rs2), cc, + (vti.Mask V0), + VLOpFrag)), + (instruction_masked (vti.Mask (IMPLICIT_DEF)), vti.RegClass:$rs1, + (DecImm simm5:$rs2), (vti.Mask V0), GPR:$vl, + vti.Log2SEW, TAIL_AGNOSTIC)>; } multiclass VPatFPSetCCVL_VV_VF_FV @llvm.vp.fptosi.v4i1.v4f16(<4 x half>, <4 x i1>, i32) + +define <4 x i1> @vfptosi_v4i1_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_v4i1_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8, v0.t +; CHECK-NEXT: vand.vi v8, v9, 1, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) + ret <4 x i1> %v +} + +define <4 x i1> @vfptosi_v4i1_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_v4i1_v4f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f16(<4 x half> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + ret <4 x i1> %v +} + +declare <4 x i1> @llvm.vp.fptosi.v4i1.v4f32(<4 x float>, <4 x i1>, i32) + +define <4 x i1> @vfptosi_v4i1_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_v4i1_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8, v0.t +; CHECK-NEXT: vand.vi v8, v9, 1, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) + ret <4 x i1> %v +} + +define <4 x i1> @vfptosi_v4i1_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_v4i1_v4f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f32(<4 x float> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + ret <4 x i1> %v +} + +declare <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double>, <4 x i1>, i32) + +define <4 x i1> @vfptosi_v4i1_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_v4i1_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8, v0.t +; CHECK-NEXT: vand.vi v8, v10, 1, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) + ret <4 x i1> %v +} + +define <4 x i1> @vfptosi_v4i1_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_v4i1_v4f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 +; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %v = call <4 x i1> @llvm.vp.fptosi.v4i1.v4f64(<4 x double> %va, <4 x i1> shufflevector (<4 x i1> insertelement (<4 x i1> undef, i1 true, i32 0), <4 x i1> undef, <4 x i32> zeroinitializer), i32 %evl) + ret <4 x i1> %v +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp-mask.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+experimental-zvfh < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+experimental-zvfh < %s | FileCheck %s + +declare @llvm.vp.fptosi.nxv2i1.nxv2f16(, , i32) + +define @vfptosi_nxv2i1_nxv2f16( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_nxv2i1_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8, v0.t +; CHECK-NEXT: vand.vi v8, v9, 1, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.fptosi.nxv2i1.nxv2f16( %va, %m, i32 %evl) + ret %v +} + +define @vfptosi_nxv2i1_nxv2f16_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_nxv2i1_nxv2f16_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %v = call @llvm.vp.fptosi.nxv2i1.nxv2f16( %va, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %evl) + ret %v +} + +declare @llvm.vp.fptosi.nxv2i1.nxv2f32(, , i32) + +define @vfptosi_nxv2i1_nxv2f32( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_nxv2i1_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8, v0.t +; CHECK-NEXT: vand.vi v8, v9, 1, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.fptosi.nxv2i1.nxv2f32( %va, %m, i32 %evl) + ret %v +} + +define @vfptosi_nxv2i1_nxv2f32_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_nxv2i1_nxv2f32_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v9, v8 +; CHECK-NEXT: vand.vi v8, v9, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %v = call @llvm.vp.fptosi.nxv2i1.nxv2f32( %va, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %evl) + ret %v +} + +declare @llvm.vp.fptosi.nxv2i1.nxv2f64(, , i32) + +define @vfptosi_nxv2i1_nxv2f64( %va, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_nxv2i1_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8, v0.t +; CHECK-NEXT: vand.vi v8, v10, 1, v0.t +; CHECK-NEXT: vmsne.vi v0, v8, 0, v0.t +; CHECK-NEXT: ret + %v = call @llvm.vp.fptosi.nxv2i1.nxv2f64( %va, %m, i32 %evl) + ret %v +} + +define @vfptosi_nxv2i1_nxv2f64_unmasked( %va, i32 zeroext %evl) { +; CHECK-LABEL: vfptosi_nxv2i1_nxv2f64_unmasked: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, mu +; CHECK-NEXT: vfncvt.rtz.x.f.w v10, v8 +; CHECK-NEXT: vand.vi v8, v10, 1 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: ret + %v = call @llvm.vp.fptosi.nxv2i1.nxv2f64( %va, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %evl) + ret %v +}