Index: include/llvm/CodeGen/SelectionDAGISel.h =================================================================== --- include/llvm/CodeGen/SelectionDAGISel.h +++ include/llvm/CodeGen/SelectionDAGISel.h @@ -147,6 +147,8 @@ OPC_CheckValueType, OPC_CheckComplexPat, OPC_CheckAndImm, OPC_CheckOrImm, + OPC_CheckImmAllOnesV, + OPC_CheckImmAllZerosV, OPC_CheckFoldableChainNode, OPC_EmitInteger, Index: include/llvm/Target/TargetSelectionDAG.td =================================================================== --- include/llvm/Target/TargetSelectionDAG.td +++ include/llvm/Target/TargetSelectionDAG.td @@ -782,14 +782,8 @@ def vtInt : PatLeaf<(vt), [{ return N->getVT().isInteger(); }]>; def vtFP : PatLeaf<(vt), [{ return N->getVT().isFloatingPoint(); }]>; -def immAllOnesV: PatLeaf<(build_vector), [{ - return ISD::isBuildVectorAllOnes(N); -}]>; -def immAllZerosV: PatLeaf<(build_vector), [{ - return ISD::isBuildVectorAllZeros(N); -}]>; - - +def immAllOnesV; // ISD::isBuildVectorAllOnes +def immAllZerosV; // ISD::isBuildVectorAllZeros // Other helper fragments. def not : PatFrag<(ops node:$in), (xor node:$in, -1)>; Index: lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -3379,6 +3379,12 @@ case OPC_CheckOrImm: if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break; continue; + case OPC_CheckImmAllOnesV: + if (!ISD::isBuildVectorAllOnes(N.getNode())) break; + continue; + case OPC_CheckImmAllZerosV: + if (!ISD::isBuildVectorAllZeros(N.getNode())) break; + continue; case OPC_CheckFoldableChainNode: { assert(NodeStack.size() != 1 && "No parent node"); Index: lib/Target/SystemZ/SystemZOperators.td =================================================================== --- lib/Target/SystemZ/SystemZOperators.td +++ lib/Target/SystemZ/SystemZOperators.td @@ -734,13 +734,13 @@ // zeroed vector. class z_vllez : PatFrag<(ops node:$addr), - (z_vector_insert (immAllZerosV), + (z_vector_insert immAllZerosV, (scalartype (load node:$addr)), (i32 index))>; def z_vllezi8 : z_vllez; def z_vllezi16 : z_vllez; def z_vllezi32 : z_vllez; def z_vllezi64 : PatFrags<(ops node:$addr), - [(z_vector_insert (immAllZerosV), + [(z_vector_insert immAllZerosV, (i64 (load node:$addr)), (i32 0)), (z_join_dwords (i64 (load node:$addr)), (i64 0))]>; // We use high merges to form a v4f32 from four f32s. Propagating zero @@ -754,11 +754,11 @@ (v4f32 (scalar_to_vector (f32 (load node:$addr)))))))), (v2i64 - (bitconvert (v4f32 (immAllZerosV)))))>; + (bitconvert (v4f32 immAllZerosV))))>; def z_vllezf64 : PatFrag<(ops node:$addr), (z_merge_high (v2f64 (scalar_to_vector (f64 (load node:$addr)))), - (immAllZerosV))>; + immAllZerosV)>; // Similarly for the high element of a zeroed vector. def z_vllezli32 : z_vllez; @@ -769,9 +769,9 @@ (z_merge_high (v4f32 (scalar_to_vector (f32 (load node:$addr)))), - (v4f32 (immAllZerosV))))), + (v4f32 immAllZerosV)))), (v2i64 - (bitconvert (v4f32 (immAllZerosV)))))>; + (bitconvert (v4f32 immAllZerosV))))>; // Store one element of a vector. class z_vste @@ -786,16 +786,16 @@ def z_vstef64 : z_vste; // Arithmetic negation on vectors. -def z_vneg : PatFrag<(ops node:$x), (sub (immAllZerosV), node:$x)>; +def z_vneg : PatFrag<(ops node:$x), (sub immAllZerosV, node:$x)>; // Bitwise negation on vectors. -def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (immAllOnesV))>; +def z_vnot : PatFrag<(ops node:$x), (xor node:$x, immAllOnesV)>; // Signed "integer greater than zero" on vectors. -def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (immAllZerosV))>; +def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, immAllZerosV)>; // Signed "integer less than zero" on vectors. -def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (immAllZerosV), node:$x)>; +def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph immAllZerosV, node:$x)>; // Integer absolute on vectors. class z_viabs Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -94,10 +94,7 @@ RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X); - // A vector type of the same width with element type i32. This is used to - // create the canonical constant zero node ImmAllZerosV. - ValueType i32VT = !cast("v" # !srl(Size, 5) # "i32"); - dag ImmAllZerosV = (VT (bitconvert (i32VT immAllZerosV))); + dag ImmAllZerosV = (VT immAllZerosV); string ZSuffix = !if (!eq (Size, 128), "Z128", !if (!eq (Size, 256), "Z256", "Z")); @@ -450,8 +447,8 @@ def AVX512_512_SEXT_MASK_64 : I<0, Pseudo, (outs VR512:$dst), (ins VK8WM:$mask), "", [(set VR512:$dst, (vselect (v8i1 VK8WM:$mask), - (bc_v8i64 (v16i32 immAllOnesV)), - (bc_v8i64 (v16i32 immAllZerosV))))]>; + (v8i64 immAllOnesV), + (v8i64 immAllZerosV)))]>; } let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, @@ -1464,7 +1461,7 @@ // Patterns for selects of bitcasted operations. def : Pat<(vselect VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), - (bc_v16f32 (v16i32 immAllZerosV))), + (v16f32 immAllZerosV)), (VBROADCASTF32X4rmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), @@ -1481,7 +1478,7 @@ def : Pat<(vselect VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), - (bc_v8f64 (v16i32 immAllZerosV))), + (v8f64 immAllZerosV)), (VBROADCASTF64X4rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv8f32 addr:$src)))), @@ -1489,7 +1486,7 @@ (VBROADCASTF64X4rmk VR512:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), - (bc_v8i64 (v16i32 immAllZerosV))), + (v8i64 immAllZerosV)), (VBROADCASTI64X4rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv8i32 addr:$src)))), @@ -1517,7 +1514,7 @@ // Patterns for selects of bitcasted operations. def : Pat<(vselect VK8WM:$mask, (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), - (bc_v8f32 (v8i32 immAllZerosV))), + (v8f32 immAllZerosV)), (VBROADCASTF32X4Z256rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8f32 (v4f64 (X86SubVBroadcast (loadv2f64 addr:$src)))), @@ -1566,7 +1563,7 @@ // Patterns for selects of bitcasted operations. def : Pat<(vselect VK4WM:$mask, (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), - (bc_v4f64 (v8i32 immAllZerosV))), + (v4f64 immAllZerosV)), (VBROADCASTF64X2Z128rmkz VK4WM:$mask, addr:$src)>; def : Pat<(vselect VK4WM:$mask, (bc_v4f64 (v8f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), @@ -1574,7 +1571,7 @@ (VBROADCASTF64X2Z128rmk VR256X:$src0, VK4WM:$mask, addr:$src)>; def : Pat<(vselect VK4WM:$mask, (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), - (bc_v4i64 (v8i32 immAllZerosV))), + (v4i64 immAllZerosV)), (VBROADCASTI64X2Z128rmkz VK4WM:$mask, addr:$src)>; def : Pat<(vselect VK4WM:$mask, (bc_v4i64 (v8i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), @@ -1599,7 +1596,7 @@ // Patterns for selects of bitcasted operations. def : Pat<(vselect VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), - (bc_v16f32 (v16i32 immAllZerosV))), + (v16f32 immAllZerosV)), (VBROADCASTF32X8rmkz VK16WM:$mask, addr:$src)>; def : Pat<(vselect VK16WM:$mask, (bc_v16f32 (v8f64 (X86SubVBroadcast (loadv4f64 addr:$src)))), @@ -1616,7 +1613,7 @@ def : Pat<(vselect VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), - (bc_v8f64 (v16i32 immAllZerosV))), + (v8f64 immAllZerosV)), (VBROADCASTF64X2rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8f64 (v16f32 (X86SubVBroadcast (loadv4f32 addr:$src)))), @@ -1624,7 +1621,7 @@ (VBROADCASTF64X2rmk VR512:$src0, VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), - (bc_v8i64 (v16i32 immAllZerosV))), + (v8i64 immAllZerosV)), (VBROADCASTI64X2rmkz VK8WM:$mask, addr:$src)>; def : Pat<(vselect VK8WM:$mask, (bc_v8i64 (v16i32 (X86SubVBroadcast (loadv4i32 addr:$src)))), @@ -3609,7 +3606,7 @@ "", []>, Sched<[WriteFStoreY]>; } -def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)), +def : Pat<(v8i64 (vselect VK8WM:$mask, (v8i64 immAllZerosV), (v8i64 VR512:$src))), (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)), VK8), VR512:$src)>; @@ -3621,7 +3618,7 @@ // These patterns exist to prevent the above patterns from introducing a second // mask inversion when one already exists. def : Pat<(v8i64 (vselect (xor VK8:$mask, (v8i1 immAllOnesV)), - (bc_v8i64 (v16i32 immAllZerosV)), + (v8i64 immAllZerosV), (v8i64 VR512:$src))), (VMOVDQA64Zrrkz VK8:$mask, VR512:$src)>; def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)), @@ -4119,8 +4116,7 @@ def : Pat<(_.info128.VT (extract_subvector (_.info512.VT (masked_load addr:$srcAddr, Mask, - (_.info512.VT (bitconvert - (v16i32 immAllZerosV))))), + _.info512.ImmAllZerosV)), (iPTR 0))), (!cast(InstrStr#rmkz) (COPY_TO_REGCLASS MaskRC:$mask, VK1WM), @@ -4145,8 +4141,7 @@ def : Pat<(_.info128.VT (extract_subvector (_.info512.VT (masked_load addr:$srcAddr, Mask, - (_.info512.VT (bitconvert - (v16i32 immAllZerosV))))), + _.info512.ImmAllZerosV)), (iPTR 0))), (!cast(InstrStr#rmkz) (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), @@ -4175,8 +4170,7 @@ // AVX512F patterns. def : Pat<(_.info128.VT (extract_subvector (_.info512.VT (masked_load addr:$srcAddr, Mask512, - (_.info512.VT (bitconvert - (v16i32 immAllZerosV))))), + _.info512.ImmAllZerosV)), (iPTR 0))), (!cast(InstrStr#rmkz) (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), @@ -4194,7 +4188,7 @@ // AVX512Vl patterns. def : Pat<(_.info128.VT (masked_load addr:$srcAddr, Mask128, - (_.info128.VT (bitconvert (v4i32 immAllZerosV))))), + _.info128.ImmAllZerosV)), (!cast(InstrStr#rmkz) (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), MaskRC:$mask, subreg)), VK1WM), addr:$srcAddr)>; @@ -11674,21 +11668,21 @@ (VMOVDDUPZ128rrk VR128X:$src0, VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast f64:$src)), - (bitconvert (v4i32 immAllZerosV))), + immAllZerosV), (VMOVDDUPZ128rrkz VK2WM:$mask, (v2f64 (COPY_TO_REGCLASS FR64X:$src, VR128X)))>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), (v2f64 VR128X:$src0)), (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadf64 addr:$src))), - (bitconvert (v4i32 immAllZerosV))), + immAllZerosV), (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))), (v2f64 VR128X:$src0)), (VMOVDDUPZ128rmk VR128X:$src0, VK2WM:$mask, addr:$src)>; def : Pat<(vselect (v2i1 VK2WM:$mask), (v2f64 (X86VBroadcast (loadv2f64 addr:$src))), - (bitconvert (v4i32 immAllZerosV))), + immAllZerosV), (VMOVDDUPZ128rmkz VK2WM:$mask, addr:$src)>; } @@ -12187,39 +12181,39 @@ // TODO: We should maybe have a more generalized algorithm for folding to // vpternlog. let Predicates = [HasAVX512] in { - def : Pat<(xor VR512:$src, (bc_v64i8 (v16i32 immAllOnesV))), + def : Pat<(xor VR512:$src, (v64i8 immAllOnesV)), (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; - def : Pat<(xor VR512:$src, (bc_v32i16 (v16i32 immAllOnesV))), + def : Pat<(xor VR512:$src, (v32i16 immAllOnesV)), (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; - def : Pat<(xor VR512:$src, (bc_v16i32 (v16i32 immAllOnesV))), + def : Pat<(xor VR512:$src, (v16i32 immAllOnesV)), (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; - def : Pat<(xor VR512:$src, (bc_v8i64 (v16i32 immAllOnesV))), + def : Pat<(xor VR512:$src, (v8i64 immAllOnesV)), (VPTERNLOGQZrri VR512:$src, VR512:$src, VR512:$src, (i8 15))>; } let Predicates = [HasAVX512, NoVLX] in { - def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (i8 15)), sub_xmm)>; - def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (i8 15)), sub_xmm)>; - def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (i8 15)), sub_xmm)>; - def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), @@ -12227,28 +12221,28 @@ (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm), (i8 15)), sub_xmm)>; - def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (i8 15)), sub_ymm)>; - def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (i8 15)), sub_ymm)>; - def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), (i8 15)), sub_ymm)>; - def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)), (EXTRACT_SUBREG (VPTERNLOGQZrri (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm), @@ -12258,22 +12252,22 @@ } let Predicates = [HasVLX] in { - def : Pat<(xor VR128X:$src, (bc_v16i8 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v16i8 immAllOnesV)), (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; - def : Pat<(xor VR128X:$src, (bc_v8i16 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v8i16 immAllOnesV)), (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; - def : Pat<(xor VR128X:$src, (bc_v4i32 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v4i32 immAllOnesV)), (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; - def : Pat<(xor VR128X:$src, (bc_v2i64 (v4i32 immAllOnesV))), + def : Pat<(xor VR128X:$src, (v2i64 immAllOnesV)), (VPTERNLOGQZ128rri VR128X:$src, VR128X:$src, VR128X:$src, (i8 15))>; - def : Pat<(xor VR256X:$src, (bc_v32i8 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v32i8 immAllOnesV)), (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; - def : Pat<(xor VR256X:$src, (bc_v16i16 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v16i16 immAllOnesV)), (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; - def : Pat<(xor VR256X:$src, (bc_v8i32 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v8i32 immAllOnesV)), (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; - def : Pat<(xor VR256X:$src, (bc_v4i64 (v8i32 immAllOnesV))), + def : Pat<(xor VR256X:$src, (v4i64 immAllOnesV)), (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>; } Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -8295,7 +8295,7 @@ def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask), undef)), (!cast(InstrStr#"rm") RC:$mask, addr:$ptr)>; def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask), - (VT (bitconvert (ZeroVT immAllZerosV))))), + (VT immAllZerosV))), (!cast(InstrStr#"rm") RC:$mask, addr:$ptr)>; def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask), (VT RC:$src0))), (!cast(BlendStr#"rr") Index: lib/Target/X86/X86InstrVecCompiler.td =================================================================== --- lib/Target/X86/X86InstrVecCompiler.td +++ lib/Target/X86/X86InstrVecCompiler.td @@ -175,7 +175,7 @@ RegisterClass RC, ValueType DstTy, ValueType SrcTy, ValueType ZeroTy, SubRegIndex SubIdx> { - def : Pat<(DstTy (insert_subvector (bitconvert (ZeroTy immAllZerosV)), + def : Pat<(DstTy (insert_subvector immAllZerosV, (SrcTy RC:$src), (iPTR 0))), (SUBREG_TO_REG (i64 0), (SrcTy (!cast("VMOV"#MoveStr#"rr") RC:$src)), SubIdx)>; Index: utils/TableGen/CodeGenDAGPatterns.cpp =================================================================== --- utils/TableGen/CodeGenDAGPatterns.cpp +++ utils/TableGen/CodeGenDAGPatterns.cpp @@ -1281,6 +1281,17 @@ // PatternToMatch implementation // +static bool isImmAllOnesAllZerosMatch(const TreePatternNode *P) { + if (!P->isLeaf()) + return false; + DefInit *DI = dyn_cast(P->getLeafValue()); + if (!DI) + return false; + + Record *R = DI->getDef(); + return R->getName() == "immAllOnesV" || R->getName() == "immAllZerosV"; +} + /// getPatternSize - Return the 'size' of this pattern. We want to match large /// patterns before small ones. This is used to determine the size of a /// pattern. @@ -1320,6 +1331,8 @@ Size += 5; // Matches a ConstantSDNode (+3) and a specific value (+2). else if (Child->getComplexPatternInfo(CGP)) Size += getPatternSize(Child, CGP); + else if (isImmAllOnesAllZerosMatch(Child)) + Size += 4; else if (!Child->getPredicateCalls().empty()) ++Size; } @@ -2126,7 +2139,8 @@ } if (R->getName() == "node" || R->getName() == "srcvalue" || - R->getName() == "zero_reg") { + R->getName() == "zero_reg" || R->getName() == "immAllOnesV" || + R->getName() == "immAllZerosV") { // Placeholder. return TypeSetByHwMode(); // Unknown. } Index: utils/TableGen/DAGISelMatcher.h =================================================================== --- utils/TableGen/DAGISelMatcher.h +++ utils/TableGen/DAGISelMatcher.h @@ -70,6 +70,8 @@ CheckComplexPat, CheckAndImm, CheckOrImm, + CheckImmAllOnesV, + CheckImmAllZerosV, CheckFoldableChainNode, // Node creation/emisssion. @@ -124,6 +126,8 @@ case CheckValueType: case CheckAndImm: case CheckOrImm: + case CheckImmAllOnesV: + case CheckImmAllZerosV: case CheckFoldableChainNode: return true; } @@ -730,6 +734,36 @@ } }; +/// CheckImmAllOnesVMatcher - This check if the current node is an build vector +/// of all ones. +class CheckImmAllOnesVMatcher : public Matcher { +public: + CheckImmAllOnesVMatcher() : Matcher(CheckImmAllOnesV) {} + + static bool classof(const Matcher *N) { + return N->getKind() == CheckImmAllOnesV; + } + +private: + void printImpl(raw_ostream &OS, unsigned indent) const override; + bool isEqualImpl(const Matcher *M) const override { return true; } +}; + +/// CheckImmAllZerosVMatcher - This check if the current node is an build vector +/// of all zeros. +class CheckImmAllZerosVMatcher : public Matcher { +public: + CheckImmAllZerosVMatcher() : Matcher(CheckImmAllZerosV) {} + + static bool classof(const Matcher *N) { + return N->getKind() == CheckImmAllZerosV; + } + +private: + void printImpl(raw_ostream &OS, unsigned indent) const override; + bool isEqualImpl(const Matcher *M) const override { return true; } +}; + /// CheckFoldableChainNodeMatcher - This checks to see if the current node /// (which defines a chain operand) is safe to fold into a larger pattern. class CheckFoldableChainNodeMatcher : public Matcher { Index: utils/TableGen/DAGISelMatcher.cpp =================================================================== --- utils/TableGen/DAGISelMatcher.cpp +++ utils/TableGen/DAGISelMatcher.cpp @@ -232,6 +232,16 @@ OS.indent(indent) << "CheckFoldableChainNode\n"; } +void CheckImmAllOnesVMatcher::printImpl(raw_ostream &OS, + unsigned indent) const { + OS.indent(indent) << "CheckAllOnesV\n"; +} + +void CheckImmAllZerosVMatcher::printImpl(raw_ostream &OS, + unsigned indent) const { + OS.indent(indent) << "CheckAllZerosV\n"; +} + void EmitIntegerMatcher::printImpl(raw_ostream &OS, unsigned indent) const { OS.indent(indent) << "EmitInteger " << Val << " VT=" << getEnumName(VT) << '\n'; Index: utils/TableGen/DAGISelMatcherEmitter.cpp =================================================================== --- utils/TableGen/DAGISelMatcherEmitter.cpp +++ utils/TableGen/DAGISelMatcherEmitter.cpp @@ -596,6 +596,14 @@ OS << "OPC_CheckFoldableChainNode,\n"; return 1; + case Matcher::CheckImmAllOnesV: + OS << "OPC_CheckImmAllOnesV,\n"; + return 1; + + case Matcher::CheckImmAllZerosV: + OS << "OPC_CheckImmAllZerosV,\n"; + return 1; + case Matcher::EmitInteger: { int64_t Val = cast(N)->getValue(); OS << "OPC_EmitInteger, " @@ -1001,6 +1009,8 @@ case Matcher::CheckOrImm: return "OPC_CheckOrImm"; break; case Matcher::CheckFoldableChainNode: return "OPC_CheckFoldableChainNode"; break; + case Matcher::CheckImmAllOnesV: return "OPC_CheckImmAllOnesV"; break; + case Matcher::CheckImmAllZerosV: return "OPC_CheckImmAllZerosV"; break; case Matcher::EmitInteger: return "OPC_EmitInteger"; break; case Matcher::EmitStringInteger: return "OPC_EmitStringInteger"; break; case Matcher::EmitRegister: return "OPC_EmitRegister"; break; Index: utils/TableGen/DAGISelMatcherGen.cpp =================================================================== --- utils/TableGen/DAGISelMatcherGen.cpp +++ utils/TableGen/DAGISelMatcherGen.cpp @@ -277,6 +277,11 @@ return; } + if (LeafRec->getName() == "immAllOnesV") + return AddMatcher(new CheckImmAllOnesVMatcher()); + if (LeafRec->getName() == "immAllZerosV") + return AddMatcher(new CheckImmAllZerosVMatcher()); + errs() << "Unknown leaf kind: " << *N << "\n"; abort(); }