Index: lib/Target/SystemZ/SystemZISelDAGToDAG.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "SystemZISelLowering.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" @@ -1526,6 +1527,20 @@ break; } + case ISD::BUILD_VECTOR: { + auto *BVN = cast(Node); + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + uint64_t Mask = 0; + if (SystemZTargetLowering::tryBuildVectorByteMask(BVN, Mask)) { + SDNode *Res = CurDAG->getMachineNode(SystemZ::VGBM, DL, VT, + CurDAG->getTargetConstant(Mask, DL, MVT::i32)); + ReplaceNode(Node, Res); + return; + } + break; + } + case ISD::STORE: { if (tryFoldLoadStoreIntoMemOperand(Node)) return; Index: lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.h +++ lib/Target/SystemZ/SystemZISelLowering.h @@ -161,10 +161,6 @@ // Transaction end. Just the chain operand. Returns CC value and chain. TEND, - // Create a vector constant by filling byte N of the result with bit - // 15-N of the single operand. - BYTE_MASK, - // Create a vector constant by replicating an element-sized RISBG-style mask. // The first operand specifies the starting set bit and the second operand // specifies the ending set bit. Both operands count from the MSB of the @@ -515,6 +511,9 @@ return true; } + static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask, + uint64_t *UndefMask = nullptr); + private: const SystemZSubtarget &Subtarget; Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2510,9 +2510,8 @@ break; } if (Invert) { - SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(65535, DL, MVT::i32)); - Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask); + SDValue Mask = + DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64)); Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); } return Cmp; @@ -3330,14 +3329,14 @@ break; } case 32: { - SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(0, DL, MVT::i32)); + SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, + DAG.getConstant(0, DL, MVT::i32)); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); break; } case 64: { - SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(0, DL, MVT::i32)); + SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, + DAG.getConstant(0, DL, MVT::i32)); Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); break; @@ -4259,10 +4258,11 @@ return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); } -// Try to represent constant BUILD_VECTOR node BVN using a -// SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask -// on success. -static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { +// Try to represent constant BUILD_VECTOR node BVN using a BYTE MASK style +// mask. Store the mask value in Mask on success. +bool SystemZTargetLowering:: +tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask, + uint64_t *UndefMask) { EVT ElemVT = BVN->getValueType(0).getVectorElementType(); unsigned BytesPerElement = ElemVT.getStoreSize(); for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { @@ -4283,6 +4283,9 @@ else if (Byte != 0) return false; } + } else if (UndefMask != nullptr) { + for (unsigned J = 0; J < BytesPerElement; ++J) + *UndefMask |= 1ULL << ((E - I - 1) * BytesPerElement + J); } } return true; @@ -4542,11 +4545,24 @@ // preferred way of creating all-zero and all-one vectors so give it // priority over other methods below. uint64_t Mask = 0; - if (tryBuildVectorByteMask(BVN, Mask)) { - SDValue Op = DAG.getNode( - SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/)); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); + uint64_t UndefMask = 0; + if (tryBuildVectorByteMask(BVN, Mask, &UndefMask)) { + if (VT.isInteger()) + return Op; + + // Floating point: build a new integer BUILD_VECTOR with all-ones, + // all-zeros or undef elements. + SmallVector Constants; + for (unsigned I = SystemZ::VectorBytes - 1; I + 1 != 0; --I) { + if (UndefMask & 1ULL << I) + Constants.push_back(DAG.getUNDEF(MVT::i32)); + else if (Mask & 1ULL << I) + Constants.push_back(DAG.getConstant(-1, DL, MVT::i32)); + else + Constants.push_back(DAG.getConstant(0, DL, MVT::i32)); + } + SDValue BVInt = DAG.getBuildVector(MVT::v16i8, DL, Constants); + return DAG.getNode(ISD::BITCAST, DL, VT, BVInt); } // Try using some form of replication. @@ -5027,7 +5043,6 @@ OPCODE(TBEGIN); OPCODE(TBEGIN_NOFLOAT); OPCODE(TEND); - OPCODE(BYTE_MASK); OPCODE(ROTATE_MASK); OPCODE(REPLICATE); OPCODE(JOIN_DWORDS); @@ -5339,8 +5354,7 @@ SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() == ISD::BITCAST) Op0 = Op0.getOperand(0); - if (Op0.getOpcode() == SystemZISD::BYTE_MASK && - cast(Op0.getOperand(0))->getZExtValue() == 0) { + if (ISD::isBuildVectorAllZeros(Op0.getNode())) { // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF // for v4f32. if (Op1 == N->getOperand(0)) Index: lib/Target/SystemZ/SystemZInstrVector.td =================================================================== --- lib/Target/SystemZ/SystemZInstrVector.td +++ lib/Target/SystemZ/SystemZInstrVector.td @@ -60,7 +60,7 @@ // Generate byte mask. def VZERO : InherentVRIa<"vzero", 0xE744, 0>; def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; - def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>; + def VGBM : UnaryVRIa<"vgbm", 0xE744, null_frag, v128b, imm32zx16>; // Generate mask. def VGM : BinaryVRIbGeneric<"vgm", 0xE746>; Index: lib/Target/SystemZ/SystemZOperators.td =================================================================== --- lib/Target/SystemZ/SystemZOperators.td +++ lib/Target/SystemZ/SystemZOperators.td @@ -286,7 +286,6 @@ SDT_ZInsertVectorElt>; def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDT_ZExtractVectorElt>; -def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>; def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>; def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>; def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>; @@ -709,8 +708,10 @@ (operator node:$val, (and node:$count, imm32bottom6set))]>; // Vector representation of all-zeros and all-ones. -def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>; -def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>; +def z_vzero : PatFrags<(ops), [(immAllZerosV), + (bitconvert (v16i8 (immAllZerosV)))]>; +def z_vones : PatFrags<(ops), [(immAllOnesV), + (bitconvert (v16i8 (immAllOnesV)))]>; // Load a scalar and replicate it in all elements of a vector. class z_replicate_load Index: test/CodeGen/SystemZ/buildvector-00.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/buildvector-00.ll @@ -0,0 +1,36 @@ +; Test that the dag combiner can understand that some vector operands are +; all-zeros and then optimize the logical operations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +define void @f1() { +; CHECK-LABEL: f1: +; CHECK: vno +; CHECK-NOT: vno + +bb: + %tmp = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer + br label %bb1 + +bb1: ; preds = %bb + %tmp2 = load i64, i64* undef, align 8 + %tmp3 = insertelement <2 x i64> undef, i64 %tmp2, i32 1 + %tmp4 = icmp ne <2 x i64> undef, zeroinitializer + %tmp5 = xor <2 x i1> %tmp4, zeroinitializer + %tmp6 = xor <2 x i1> zeroinitializer, %tmp5 + %tmp7 = and <2 x i64> %tmp3, %tmp + %tmp8 = icmp ne <2 x i64> %tmp7, zeroinitializer + %tmp9 = xor <2 x i1> zeroinitializer, %tmp8 + %tmp10 = icmp ne <2 x i64> undef, zeroinitializer + %tmp11 = xor <2 x i1> %tmp10, %tmp9 + %tmp12 = and <2 x i1> %tmp6, %tmp11 + %tmp13 = extractelement <2 x i1> %tmp12, i32 0 + br i1 %tmp13, label %bb14, label %bb15 + +bb14: ; preds = %bb1 + store i64 undef, i64* undef, align 8 + br label %bb15 + +bb15: ; preds = %bb14, %bb1 + unreachable +}