Index: lib/Target/SystemZ/SystemZISelDAGToDAG.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "SystemZISelLowering.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" @@ -1526,6 +1527,20 @@ break; } + case ISD::BUILD_VECTOR: { + auto *BVN = cast(Node); + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + uint64_t Mask = 0; + if (SystemZTargetLowering::tryBuildVectorByteMask(BVN, Mask)) { + SDNode *Res = CurDAG->getMachineNode(SystemZ::VGBM, DL, VT, + CurDAG->getTargetConstant(Mask, DL, MVT::i32)); + ReplaceNode(Node, Res); + return; + } + break; + } + case ISD::STORE: { if (tryFoldLoadStoreIntoMemOperand(Node)) return; Index: lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.h +++ lib/Target/SystemZ/SystemZISelLowering.h @@ -161,10 +161,6 @@ // Transaction end. Just the chain operand. Returns CC value and chain. TEND, - // Create a vector constant by filling byte N of the result with bit - // 15-N of the single operand. - BYTE_MASK, - // Create a vector constant by replicating an element-sized RISBG-style mask. // The first operand specifies the starting set bit and the second operand // specifies the ending set bit. Both operands count from the MSB of the @@ -515,6 +511,8 @@ return true; } + static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask); + private: const SystemZSubtarget &Subtarget; Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2510,9 +2510,8 @@ break; } if (Invert) { - SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(65535, DL, MVT::i32)); - Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask); + SDValue Mask = + DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); } return Cmp; @@ -3330,14 +3329,14 @@ break; } case 32: { - SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(0, DL, MVT::i32)); + SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, + DAG.getConstant(0, DL, MVT::i32)); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); break; } case 64: { - SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(0, DL, MVT::i32)); + SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, + DAG.getConstant(0, DL, MVT::i32)); Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); break; @@ -4259,10 +4258,10 @@ return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); } -// Try to represent constant BUILD_VECTOR node BVN using a -// SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask -// on success. -static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { +// Try to represent constant BUILD_VECTOR node BVN using a BYTE MASK style +// mask. Store the mask value in Mask on success. +bool SystemZTargetLowering:: +tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { EVT ElemVT = BVN->getValueType(0).getVectorElementType(); unsigned BytesPerElement = ElemVT.getStoreSize(); for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { @@ -4541,13 +4540,11 @@ // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- // preferred way of creating all-zero and all-one vectors so give it // priority over other methods below. - uint64_t Mask = 0; - if (tryBuildVectorByteMask(BVN, Mask)) { - SDValue Op = DAG.getNode( - SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/)); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); - } + uint64_t Mask; + if (ISD::isBuildVectorAllZeros(Op.getNode()) || + ISD::isBuildVectorAllOnes(Op.getNode()) || + (VT.isInteger() && tryBuildVectorByteMask(BVN, Mask))) + return Op; // Try using some form of replication. APInt SplatBits, SplatUndef; @@ -5027,7 +5024,6 @@ OPCODE(TBEGIN); OPCODE(TBEGIN_NOFLOAT); OPCODE(TEND); - OPCODE(BYTE_MASK); OPCODE(ROTATE_MASK); OPCODE(REPLICATE); OPCODE(JOIN_DWORDS); @@ -5339,8 +5335,7 @@ SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() == ISD::BITCAST) Op0 = Op0.getOperand(0); - if (Op0.getOpcode() == SystemZISD::BYTE_MASK && - cast(Op0.getOperand(0))->getZExtValue() == 0) { + if (ISD::isBuildVectorAllZeros(Op0.getNode())) { // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF // for v4f32. if (Op1 == N->getOperand(0)) Index: lib/Target/SystemZ/SystemZInstrVector.td =================================================================== --- lib/Target/SystemZ/SystemZInstrVector.td +++ lib/Target/SystemZ/SystemZInstrVector.td @@ -60,7 +60,7 @@ // Generate byte mask. def VZERO : InherentVRIa<"vzero", 0xE744, 0>; def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; - def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>; + def VGBM : UnaryVRIa<"vgbm", 0xE744, null_frag, v128b, imm32zx16>; // Generate mask. def VGM : BinaryVRIbGeneric<"vgm", 0xE746>; Index: lib/Target/SystemZ/SystemZOperators.td =================================================================== --- lib/Target/SystemZ/SystemZOperators.td +++ lib/Target/SystemZ/SystemZOperators.td @@ -286,7 +286,6 @@ SDT_ZInsertVectorElt>; def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDT_ZExtractVectorElt>; -def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>; def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>; def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>; def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>; @@ -708,10 +707,6 @@ [(operator node:$val, node:$count), (operator node:$val, (and node:$count, imm32bottom6set))]>; -// Vector representation of all-zeros and all-ones. -def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>; -def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>; - // Load a scalar and replicate it in all elements of a vector. class z_replicate_load : PatFrag<(ops node:$addr), @@ -739,13 +734,13 @@ // zeroed vector. class z_vllez : PatFrag<(ops node:$addr), - (z_vector_insert (z_vzero), + (z_vector_insert (immAllZerosV), (scalartype (load node:$addr)), (i32 index))>; def z_vllezi8 : z_vllez; def z_vllezi16 : z_vllez; def z_vllezi32 : z_vllez; def z_vllezi64 : PatFrags<(ops node:$addr), - [(z_vector_insert (z_vzero), + [(z_vector_insert (immAllZerosV), (i64 (load node:$addr)), (i32 0)), (z_join_dwords (i64 (load node:$addr)), (i64 0))]>; // We use high merges to form a v4f32 from four f32s. Propagating zero @@ -758,11 +753,12 @@ (bitconvert (v4f32 (scalar_to_vector (f32 (load node:$addr)))))))), - (v2i64 (z_vzero)))>; + (v2i64 + (bitconvert (v4f32 (immAllZerosV)))))>; def z_vllezf64 : PatFrag<(ops node:$addr), (z_merge_high (v2f64 (scalar_to_vector (f64 (load node:$addr)))), - (z_vzero))>; + (immAllZerosV))>; // Similarly for the high element of a zeroed vector. def z_vllezli32 : z_vllez; @@ -773,8 +769,9 @@ (z_merge_high (v4f32 (scalar_to_vector (f32 (load node:$addr)))), - (v4f32 (z_vzero))))), - (v2i64 (z_vzero)))>; + (v4f32 (immAllZerosV))))), + (v2i64 + (bitconvert (v4f32 (immAllZerosV)))))>; // Store one element of a vector. class z_vste @@ -789,16 +786,16 @@ def z_vstef64 : z_vste; // Arithmetic negation on vectors. -def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>; +def z_vneg : PatFrag<(ops node:$x), (sub (immAllZerosV), node:$x)>; // Bitwise negation on vectors. -def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>; +def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (immAllOnesV))>; // Signed "integer greater than zero" on vectors. -def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>; +def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (immAllZerosV))>; // Signed "integer less than zero" on vectors. -def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>; +def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (immAllZerosV), node:$x)>; // Integer absolute on vectors. class z_viabs Index: test/CodeGen/SystemZ/buildvector-00.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/buildvector-00.ll @@ -0,0 +1,36 @@ +; Test that the dag combiner can understand that some vector operands are +; all-zeros and then optimize the logical operations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +define void @f1() { +; CHECK-LABEL: f1: +; CHECK: vno +; CHECK-NOT: vno + +bb: + %tmp = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer + br label %bb1 + +bb1: ; preds = %bb + %tmp2 = load i64, i64* undef, align 8 + %tmp3 = insertelement <2 x i64> undef, i64 %tmp2, i32 1 + %tmp4 = icmp ne <2 x i64> undef, zeroinitializer + %tmp5 = xor <2 x i1> %tmp4, zeroinitializer + %tmp6 = xor <2 x i1> zeroinitializer, %tmp5 + %tmp7 = and <2 x i64> %tmp3, %tmp + %tmp8 = icmp ne <2 x i64> %tmp7, zeroinitializer + %tmp9 = xor <2 x i1> zeroinitializer, %tmp8 + %tmp10 = icmp ne <2 x i64> undef, zeroinitializer + %tmp11 = xor <2 x i1> %tmp10, %tmp9 + %tmp12 = and <2 x i1> %tmp6, %tmp11 + %tmp13 = extractelement <2 x i1> %tmp12, i32 0 + br i1 %tmp13, label %bb14, label %bb15 + +bb14: ; preds = %bb1 + store i64 undef, i64* undef, align 8 + br label %bb15 + +bb15: ; preds = %bb14, %bb1 + unreachable +} Index: test/CodeGen/SystemZ/vec-const-05.ll =================================================================== --- test/CodeGen/SystemZ/vec-const-05.ll +++ test/CodeGen/SystemZ/vec-const-05.ll @@ -1,63 +1,28 @@ -; Test vector byte masks, v4f32 version. +; Test vector byte masks, v4f32 version. Only all-zero vectors are handled. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test an all-zeros vector. -define <4 x float> @f1() { -; CHECK-LABEL: f1: +define <4 x float> @f0() { +; CHECK-LABEL: f0: ; CHECK: vgbm %v24, 0 ; CHECK: br %r14 ret <4 x float> zeroinitializer } -; Test an all-ones vector. -define <4 x float> @f2() { -; CHECK-LABEL: f2: -; CHECK: vgbm %v24, 65535 -; CHECK: br %r14 - ret <4 x float> -} - -; Test a mixed vector (mask 0xc731). -define <4 x float> @f3() { -; CHECK-LABEL: f3: -; CHECK: vgbm %v24, 50993 -; CHECK: br %r14 - ret <4 x float> -} - -; Test that undefs are treated as zero (mask 0xc031). -define <4 x float> @f4() { -; CHECK-LABEL: f4: -; CHECK: vgbm %v24, 49201 -; CHECK: br %r14 - ret <4 x float> -} - -; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. -define <4 x float> @f5() { -; CHECK-LABEL: f5: -; CHECK-NOT: vgbm +; Test that undefs are treated as zero. +define <4 x float> @f1() { +; CHECK-LABEL: f1: +; CHECK: vgbm %v24, 0 ; CHECK: br %r14 - ret <4 x float> + ret <4 x float> } ; Test an all-zeros v2f32 that gets promoted to v4f32. -define <2 x float> @f6() { -; CHECK-LABEL: f6: +define <2 x float> @f2() { +; CHECK-LABEL: f2: ; CHECK: vgbm %v24, 0 ; CHECK: br %r14 ret <2 x float> zeroinitializer } - -; Test a mixed v2f32 that gets promoted to v4f32 (mask 0xc700). -define <2 x float> @f7() { -; CHECK-LABEL: f7: -; CHECK: vgbm %v24, 50944 -; CHECK: br %r14 - ret <2 x float> -} Index: test/CodeGen/SystemZ/vec-const-06.ll =================================================================== --- test/CodeGen/SystemZ/vec-const-06.ll +++ test/CodeGen/SystemZ/vec-const-06.ll @@ -1,43 +1,19 @@ -; Test vector byte masks, v2f64 version. +; Test vector byte masks, v2f64 version. Only all-zero vectors are handled. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test an all-zeros vector. -define <2 x double> @f1() { -; CHECK-LABEL: f1: +define <2 x double> @f0() { +; CHECK-LABEL: f0: ; CHECK: vgbm %v24, 0 ; CHECK: br %r14 ret <2 x double> zeroinitializer } -; Test an all-ones vector. -define <2 x double> @f2() { -; CHECK-LABEL: f2: -; CHECK: vgbm %v24, 65535 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a mixed vector (mask 0x8c76). -define <2 x double> @f3() { -; CHECK-LABEL: f3: -; CHECK: vgbm %v24, 35958 -; CHECK: br %r14 - ret <2 x double> -} - -; Test that undefs are treated as zero (mask 0x8c00). -define <2 x double> @f4() { -; CHECK-LABEL: f4: -; CHECK: vgbm %v24, 35840 -; CHECK: br %r14 - ret <2 x double> -} - -; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. -define <2 x double> @f5() { -; CHECK-LABEL: f5: -; CHECK-NOT: vgbm +; Test that undefs are treated as zero. +define <2 x double> @f1() { +; CHECK-LABEL: f1: +; CHECK: vgbm %v24, 0 ; CHECK: br %r14 - ret <2 x double> + ret <2 x double> }