Index: lib/Target/SystemZ/SystemZISelDAGToDAG.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "SystemZTargetMachine.h" +#include "SystemZISelLowering.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Support/Debug.h" @@ -1526,6 +1527,21 @@ break; } + case ISD::BUILD_VECTOR: { + auto *BVN = cast(Node); + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + uint64_t Mask = 0; + if (SystemZTargetLowering::tryBuildVectorByteMask(BVN, Mask)) { + SDNode *Res = CurDAG->getMachineNode(SystemZ::VGBM, DL, VT, + CurDAG->getTargetConstant(Mask, DL, MVT::i32)); + ReplaceNode(Node, Res); + return; + } + + break; + } + case ISD::STORE: { if (tryFoldLoadStoreIntoMemOperand(Node)) return; Index: lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.h +++ lib/Target/SystemZ/SystemZISelLowering.h @@ -161,10 +161,6 @@ // Transaction end. Just the chain operand. Returns CC value and chain. TEND, - // Create a vector constant by filling byte N of the result with bit - // 15-N of the single operand. - BYTE_MASK, - // Create a vector constant by replicating an element-sized RISBG-style mask. // The first operand specifies the starting set bit and the second operand // specifies the ending set bit. Both operands count from the MSB of the @@ -515,6 +511,8 @@ return true; } + static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask); + private: const SystemZSubtarget &Subtarget; Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -2510,9 +2510,8 @@ break; } if (Invert) { - SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(65535, DL, MVT::i32)); - Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask); + SDValue Mask = + DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, VT.getScalarType())); Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); } return Cmp; @@ -3330,14 +3329,14 @@ break; } case 32: { - SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(0, DL, MVT::i32)); + SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, + DAG.getConstant(0, DL, MVT::i8)); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); break; } case 64: { - SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(0, DL, MVT::i32)); + SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL, + DAG.getConstant(0, DL, MVT::i8)); Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); break; @@ -4259,10 +4258,10 @@ return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); } -// Try to represent constant BUILD_VECTOR node BVN using a -// SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask -// on success. -static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { +// Try to represent constant BUILD_VECTOR node BVN using a BYTE MASK style +// mask. Store the mask value in Mask on success. +bool SystemZTargetLowering:: +tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { EVT ElemVT = BVN->getValueType(0).getVectorElementType(); unsigned BytesPerElement = ElemVT.getStoreSize(); for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { @@ -4271,7 +4270,8 @@ uint64_t Value; if (Op.getOpcode() == ISD::Constant) Value = cast(Op)->getZExtValue(); - else if (Op.getOpcode() == ISD::ConstantFP) + else if (Op.getOpcode() == ISD::ConstantFP || + Op.getOpcode() == ISD::TargetConstantFP) Value = (cast(Op)->getValueAPF().bitcastToAPInt() .getZExtValue()); else @@ -4529,6 +4529,15 @@ return Result; } +static bool isAllTargetConstantFPBuildVector(BuildVectorSDNode *BVN) { + for (unsigned I = 0; I < BVN->getNumOperands(); ++I) { + SDValue OpI = BVN->getOperand(I); + if (!OpI.isUndef() && OpI->getOpcode() != ISD::TargetConstantFP) + return false; + } + return true; +} + SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { const SystemZInstrInfo *TII = @@ -4537,16 +4546,34 @@ SDLoc DL(Op); EVT VT = Op.getValueType(); + if (isAllTargetConstantFPBuildVector(BVN)) + // The BVN just returned below for VGBM. + return Op; + if (BVN->isConstant()) { // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- // preferred way of creating all-zero and all-one vectors so give it - // priority over other methods below. + // priority over other methods below. Use tryBuildVectorByteMask() in + // order to find out if this is suitable for VGBM. Integer BUILD_VECTORs + // can be kept as they are, but FP constants must be turned into + // TargetConstantFP or they will end up in the constant pool. uint64_t Mask = 0; if (tryBuildVectorByteMask(BVN, Mask)) { - SDValue Op = DAG.getNode( - SystemZISD::BYTE_MASK, DL, MVT::v16i8, - DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/)); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); + if (VT.isInteger()) + return Op; + + unsigned NumElts = BVN->getNumOperands(); + SmallVector Constants(NumElts, SDValue()); + for (unsigned I = 0; I < NumElts; ++I) { + SDValue OpI = BVN->getOperand(I); + if (OpI.isUndef()) { + Constants[I] = DAG.getUNDEF(VT.getScalarType()); + continue; + } + const ConstantFP *Val=cast(OpI)->getConstantFPValue(); + Constants[I] = DAG.getTargetConstantFP(*Val, DL, VT.getScalarType()); + } + return DAG.getBuildVector(VT, DL, Constants); } // Try using some form of replication. @@ -5027,7 +5054,6 @@ OPCODE(TBEGIN); OPCODE(TBEGIN_NOFLOAT); OPCODE(TEND); - OPCODE(BYTE_MASK); OPCODE(ROTATE_MASK); OPCODE(REPLICATE); OPCODE(JOIN_DWORDS); @@ -5339,8 +5365,7 @@ SDValue Op1 = N->getOperand(1); if (Op0.getOpcode() == ISD::BITCAST) Op0 = Op0.getOperand(0); - if (Op0.getOpcode() == SystemZISD::BYTE_MASK && - cast(Op0.getOperand(0))->getZExtValue() == 0) { + if (ISD::isBuildVectorAllZeros(Op0.getNode())) { // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF // for v4f32. if (Op1 == N->getOperand(0)) Index: lib/Target/SystemZ/SystemZInstrVector.td =================================================================== --- lib/Target/SystemZ/SystemZInstrVector.td +++ lib/Target/SystemZ/SystemZInstrVector.td @@ -60,7 +60,7 @@ // Generate byte mask. def VZERO : InherentVRIa<"vzero", 0xE744, 0>; def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; - def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>; + def VGBM : UnaryVRIa<"vgbm", 0xE744, null_frag, v128b, imm32zx16>; // Generate mask. def VGM : BinaryVRIbGeneric<"vgm", 0xE746>; Index: lib/Target/SystemZ/SystemZOperators.td =================================================================== --- lib/Target/SystemZ/SystemZOperators.td +++ lib/Target/SystemZ/SystemZOperators.td @@ -286,7 +286,6 @@ SDT_ZInsertVectorElt>; def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDT_ZExtractVectorElt>; -def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>; def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>; def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>; def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>; @@ -709,8 +708,8 @@ (operator node:$val, (and node:$count, imm32bottom6set))]>; // Vector representation of all-zeros and all-ones. -def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>; -def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>; +def z_vzero : PatFrag<(ops), (immAllZerosV)>; +def z_vones : PatFrag<(ops), (immAllOnesV)>; // Load a scalar and replicate it in all elements of a vector. class z_replicate_load @@ -758,7 +757,7 @@ (bitconvert (v4f32 (scalar_to_vector (f32 (load node:$addr)))))))), - (v2i64 (z_vzero)))>; + (v2i64 (bitconvert (v4f32 z_vzero))))>; def z_vllezf64 : PatFrag<(ops node:$addr), (z_merge_high (v2f64 (scalar_to_vector (f64 (load node:$addr)))), @@ -774,7 +773,7 @@ (v4f32 (scalar_to_vector (f32 (load node:$addr)))), (v4f32 (z_vzero))))), - (v2i64 (z_vzero)))>; + (v2i64 (bitconvert (v4f32 z_vzero))))>; // Store one element of a vector. class z_vste Index: test/CodeGen/SystemZ/buildvector-00.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/buildvector-00.ll @@ -0,0 +1,36 @@ +; Test that the dag combiner can understand that some vector operands are +; all-zeros and then optimize the logical operations. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +define void @f1() { +; CHECK-LABEL: f1: +; CHECK: vno +; CHECK-NOT: vno + +bb: + %tmp = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer + br label %bb1 + +bb1: ; preds = %bb + %tmp2 = load i64, i64* undef, align 8 + %tmp3 = insertelement <2 x i64> undef, i64 %tmp2, i32 1 + %tmp4 = icmp ne <2 x i64> undef, zeroinitializer + %tmp5 = xor <2 x i1> %tmp4, zeroinitializer + %tmp6 = xor <2 x i1> zeroinitializer, %tmp5 + %tmp7 = and <2 x i64> %tmp3, %tmp + %tmp8 = icmp ne <2 x i64> %tmp7, zeroinitializer + %tmp9 = xor <2 x i1> zeroinitializer, %tmp8 + %tmp10 = icmp ne <2 x i64> undef, zeroinitializer + %tmp11 = xor <2 x i1> %tmp10, %tmp9 + %tmp12 = and <2 x i1> %tmp6, %tmp11 + %tmp13 = extractelement <2 x i1> %tmp12, i32 0 + br i1 %tmp13, label %bb14, label %bb15 + +bb14: ; preds = %bb1 + store i64 undef, i64* undef, align 8 + br label %bb15 + +bb15: ; preds = %bb14, %bb1 + unreachable +}