@@ -8781,16 +8781,50 @@ multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move
8781
8781
def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8782
8782
(OpNode (extractelt _.VT:$src2, (iPTR 0))),
8783
8783
(extractelt _.VT:$dst, (iPTR 0))))),
8784
- (!cast<Instruction>("V"#OpcPrefix#r_Intk )
8784
+ (!cast<Instruction>("V"#OpcPrefix#Zr_Intk )
8785
8785
_.VT:$dst, OutMask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
8786
8786
8787
8787
def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects Mask,
8788
8788
(OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))),
8789
- (!cast<Instruction>("V"#OpcPrefix#r_Intkz )
8789
+ (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz )
8790
8790
OutMask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
8791
8791
}
8792
8792
}
8793
8793
8794
+ defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
8795
+ (v1i1 (scalar_to_vector GR32:$mask)),
8796
+ v4f32x_info, fp32imm0, 0x01,
8797
+ (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8798
+ defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
8799
+ (v1i1 (scalar_to_vector GR8:$mask)),
8800
+ v4f32x_info, fp32imm0, 0x01,
8801
+ (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8802
+ defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
8803
+ (v1i1 (scalar_to_vector GR32:$mask)),
8804
+ v4f32x_info, fp32imm0, 0x02,
8805
+ (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8806
+ defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
8807
+ (v1i1 (scalar_to_vector GR8:$mask)),
8808
+ v4f32x_info, fp32imm0, 0x02,
8809
+ (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8810
+ defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
8811
+ (v1i1 (scalar_to_vector GR32:$mask)),
8812
+ v2f64x_info, fp64imm0, 0x01,
8813
+ (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8814
+ defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
8815
+ (v1i1 (scalar_to_vector GR8:$mask)),
8816
+ v2f64x_info, fp64imm0, 0x01,
8817
+ (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8818
+ defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
8819
+ (v1i1 (scalar_to_vector GR32:$mask)),
8820
+ v2f64x_info, fp64imm0, 0x02,
8821
+ (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8822
+ defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
8823
+ (v1i1 (scalar_to_vector GR8:$mask)),
8824
+ v2f64x_info, fp64imm0, 0x02,
8825
+ (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
8826
+
8827
+
8794
8828
//-------------------------------------------------
8795
8829
// Integer truncate and extend operations
8796
8830
//-------------------------------------------------
@@ -9936,10 +9970,18 @@ defm VGETMANTSS: avx512_common_fp_sae_scalar_imm<"vgetmantss", f32x_info,
9936
9970
let Predicates = [HasAVX512] in {
9937
9971
def : Pat<(v16f32 (ffloor VR512:$src)),
9938
9972
(VRNDSCALEPSZrri VR512:$src, (i32 0x9))>;
9973
+ def : Pat<(v16f32 (vselect VK16WM:$mask, (ffloor VR512:$src), VR512:$dst)),
9974
+ (VRNDSCALEPSZrrik VR512:$dst, VK16WM:$mask, VR512:$src, (i32 0x9))>;
9975
+ def : Pat<(v16f32 (vselect VK16WM:$mask, (ffloor VR512:$src), v16f32_info.ImmAllZerosV)),
9976
+ (VRNDSCALEPSZrrikz VK16WM:$mask, VR512:$src, (i32 0x9))>;
9939
9977
def : Pat<(v16f32 (fnearbyint VR512:$src)),
9940
9978
(VRNDSCALEPSZrri VR512:$src, (i32 0xC))>;
9941
9979
def : Pat<(v16f32 (fceil VR512:$src)),
9942
9980
(VRNDSCALEPSZrri VR512:$src, (i32 0xA))>;
9981
+ def : Pat<(v16f32 (vselect VK16WM:$mask, (fceil VR512:$src), VR512:$dst)),
9982
+ (VRNDSCALEPSZrrik VR512:$dst, VK16WM:$mask, VR512:$src, (i32 0xA))>;
9983
+ def : Pat<(v16f32 (vselect VK16WM:$mask, (fceil VR512:$src), v16f32_info.ImmAllZerosV)),
9984
+ (VRNDSCALEPSZrrikz VK16WM:$mask, VR512:$src, (i32 0xA))>;
9943
9985
def : Pat<(v16f32 (frint VR512:$src)),
9944
9986
(VRNDSCALEPSZrri VR512:$src, (i32 0x4))>;
9945
9987
def : Pat<(v16f32 (ftrunc VR512:$src)),
@@ -9958,10 +10000,18 @@ def : Pat<(v16f32 (ftrunc (loadv16f32 addr:$src))),
9958
10000
9959
10001
def : Pat<(v8f64 (ffloor VR512:$src)),
9960
10002
(VRNDSCALEPDZrri VR512:$src, (i32 0x9))>;
10003
+ def : Pat<(v8f64 (vselect VK8WM:$mask, (ffloor VR512:$src), VR512:$dst)),
10004
+ (VRNDSCALEPDZrrik VR512:$dst, VK8WM:$mask, VR512:$src, (i32 0x9))>;
10005
+ def : Pat<(v8f64 (vselect VK8WM:$mask, (ffloor VR512:$src), v8f64_info.ImmAllZerosV)),
10006
+ (VRNDSCALEPDZrrikz VK8WM:$mask, VR512:$src, (i32 0x9))>;
9961
10007
def : Pat<(v8f64 (fnearbyint VR512:$src)),
9962
10008
(VRNDSCALEPDZrri VR512:$src, (i32 0xC))>;
9963
10009
def : Pat<(v8f64 (fceil VR512:$src)),
9964
10010
(VRNDSCALEPDZrri VR512:$src, (i32 0xA))>;
10011
+ def : Pat<(v8f64 (vselect VK8WM:$mask, (fceil VR512:$src), VR512:$dst)),
10012
+ (VRNDSCALEPDZrrik VR512:$dst, VK8WM:$mask, VR512:$src, (i32 0xA))>;
10013
+ def : Pat<(v8f64 (vselect VK8WM:$mask, (fceil VR512:$src), v8f64_info.ImmAllZerosV)),
10014
+ (VRNDSCALEPDZrrikz VK8WM:$mask, VR512:$src, (i32 0xA))>;
9965
10015
def : Pat<(v8f64 (frint VR512:$src)),
9966
10016
(VRNDSCALEPDZrri VR512:$src, (i32 0x4))>;
9967
10017
def : Pat<(v8f64 (ftrunc VR512:$src)),
@@ -9982,10 +10032,18 @@ def : Pat<(v8f64 (ftrunc (loadv8f64 addr:$src))),
9982
10032
let Predicates = [HasVLX] in {
9983
10033
def : Pat<(v4f32 (ffloor VR128X:$src)),
9984
10034
(VRNDSCALEPSZ128rri VR128X:$src, (i32 0x9))>;
10035
+ def : Pat<(v4f32 (vselect VK4WM:$mask, (ffloor VR128X:$src), VR128X:$dst)),
10036
+ (VRNDSCALEPSZ128rrik VR128X:$dst, VK4WM:$mask, VR128X:$src, (i32 0x9))>;
10037
+ def : Pat<(v4f32 (vselect VK4WM:$mask, (ffloor VR128X:$src), v4f32x_info.ImmAllZerosV)),
10038
+ (VRNDSCALEPSZ128rrikz VK4WM:$mask, VR128X:$src, (i32 0x9))>;
9985
10039
def : Pat<(v4f32 (fnearbyint VR128X:$src)),
9986
10040
(VRNDSCALEPSZ128rri VR128X:$src, (i32 0xC))>;
9987
10041
def : Pat<(v4f32 (fceil VR128X:$src)),
9988
10042
(VRNDSCALEPSZ128rri VR128X:$src, (i32 0xA))>;
10043
+ def : Pat<(v4f32 (vselect VK4WM:$mask, (fceil VR128X:$src), VR128X:$dst)),
10044
+ (VRNDSCALEPSZ128rrik VR128X:$dst, VK4WM:$mask, VR128X:$src, (i32 0xA))>;
10045
+ def : Pat<(v4f32 (vselect VK4WM:$mask, (fceil VR128X:$src), v4f32x_info.ImmAllZerosV)),
10046
+ (VRNDSCALEPSZ128rrikz VK4WM:$mask, VR128X:$src, (i32 0xA))>;
9989
10047
def : Pat<(v4f32 (frint VR128X:$src)),
9990
10048
(VRNDSCALEPSZ128rri VR128X:$src, (i32 0x4))>;
9991
10049
def : Pat<(v4f32 (ftrunc VR128X:$src)),
@@ -10004,10 +10062,18 @@ def : Pat<(v4f32 (ftrunc (loadv4f32 addr:$src))),
10004
10062
10005
10063
def : Pat<(v2f64 (ffloor VR128X:$src)),
10006
10064
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0x9))>;
10065
+ def : Pat<(v2f64 (vselect VK2WM:$mask, (ffloor VR128X:$src), VR128X:$dst)),
10066
+ (VRNDSCALEPDZ128rrik VR128X:$dst, VK2WM:$mask, VR128X:$src, (i32 0x9))>;
10067
+ def : Pat<(v2f64 (vselect VK2WM:$mask, (ffloor VR128X:$src), v2f64x_info.ImmAllZerosV)),
10068
+ (VRNDSCALEPDZ128rrikz VK2WM:$mask, VR128X:$src, (i32 0x9))>;
10007
10069
def : Pat<(v2f64 (fnearbyint VR128X:$src)),
10008
10070
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0xC))>;
10009
10071
def : Pat<(v2f64 (fceil VR128X:$src)),
10010
10072
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0xA))>;
10073
+ def : Pat<(v2f64 (vselect VK2WM:$mask, (fceil VR128X:$src), VR128X:$dst)),
10074
+ (VRNDSCALEPDZ128rrik VR128X:$dst, VK2WM:$mask, VR128X:$src, (i32 0xA))>;
10075
+ def : Pat<(v2f64 (vselect VK2WM:$mask, (fceil VR128X:$src), v2f64x_info.ImmAllZerosV)),
10076
+ (VRNDSCALEPDZ128rrikz VK2WM:$mask, VR128X:$src, (i32 0xA))>;
10011
10077
def : Pat<(v2f64 (frint VR128X:$src)),
10012
10078
(VRNDSCALEPDZ128rri VR128X:$src, (i32 0x4))>;
10013
10079
def : Pat<(v2f64 (ftrunc VR128X:$src)),
@@ -10026,10 +10092,18 @@ def : Pat<(v2f64 (ftrunc (loadv2f64 addr:$src))),
10026
10092
10027
10093
def : Pat<(v8f32 (ffloor VR256X:$src)),
10028
10094
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0x9))>;
10095
+ def : Pat<(v8f32 (vselect VK8WM:$mask, (ffloor VR256X:$src), VR256X:$dst)),
10096
+ (VRNDSCALEPSZ256rrik VR256X:$dst, VK8WM:$mask, VR256X:$src, (i32 0x9))>;
10097
+ def : Pat<(v8f32 (vselect VK8WM:$mask, (ffloor VR256X:$src), v8f32x_info.ImmAllZerosV)),
10098
+ (VRNDSCALEPSZ256rrikz VK8WM:$mask, VR256X:$src, (i32 0x9))>;
10029
10099
def : Pat<(v8f32 (fnearbyint VR256X:$src)),
10030
10100
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0xC))>;
10031
10101
def : Pat<(v8f32 (fceil VR256X:$src)),
10032
10102
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0xA))>;
10103
+ def : Pat<(v8f32 (vselect VK8WM:$mask, (fceil VR256X:$src), VR256X:$dst)),
10104
+ (VRNDSCALEPSZ256rrik VR256X:$dst, VK8WM:$mask, VR256X:$src, (i32 0xA))>;
10105
+ def : Pat<(v8f32 (vselect VK8WM:$mask, (fceil VR256X:$src), v8f32x_info.ImmAllZerosV)),
10106
+ (VRNDSCALEPSZ256rrikz VK8WM:$mask, VR256X:$src, (i32 0xA))>;
10033
10107
def : Pat<(v8f32 (frint VR256X:$src)),
10034
10108
(VRNDSCALEPSZ256rri VR256X:$src, (i32 0x4))>;
10035
10109
def : Pat<(v8f32 (ftrunc VR256X:$src)),
@@ -10048,10 +10122,18 @@ def : Pat<(v8f32 (ftrunc (loadv8f32 addr:$src))),
10048
10122
10049
10123
def : Pat<(v4f64 (ffloor VR256X:$src)),
10050
10124
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0x9))>;
10125
+ def : Pat<(v4f64 (vselect VK4WM:$mask, (ffloor VR256X:$src), VR256X:$dst)),
10126
+ (VRNDSCALEPDZ256rrik VR256X:$dst, VK4WM:$mask, VR256X:$src, (i32 0x9))>;
10127
+ def : Pat<(v4f64 (vselect VK4WM:$mask, (ffloor VR256X:$src), v4f64x_info.ImmAllZerosV)),
10128
+ (VRNDSCALEPDZ256rrikz VK4WM:$mask, VR256X:$src, (i32 0x9))>;
10051
10129
def : Pat<(v4f64 (fnearbyint VR256X:$src)),
10052
10130
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0xC))>;
10053
10131
def : Pat<(v4f64 (fceil VR256X:$src)),
10054
10132
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0xA))>;
10133
+ def : Pat<(v4f64 (vselect VK4WM:$mask, (fceil VR256X:$src), VR256X:$dst)),
10134
+ (VRNDSCALEPDZ256rrik VR256X:$dst, VK4WM:$mask, VR256X:$src, (i32 0xA))>;
10135
+ def : Pat<(v4f64 (vselect VK4WM:$mask, (fceil VR256X:$src), v4f64x_info.ImmAllZerosV)),
10136
+ (VRNDSCALEPDZ256rrikz VK4WM:$mask, VR256X:$src, (i32 0xA))>;
10055
10137
def : Pat<(v4f64 (frint VR256X:$src)),
10056
10138
(VRNDSCALEPDZ256rri VR256X:$src, (i32 0x4))>;
10057
10139
def : Pat<(v4f64 (ftrunc VR256X:$src)),
0 commit comments