Index: lib/Target/SystemZ/SystemZISelDAGToDAG.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -304,6 +304,9 @@ void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal); + void loadVectorConstant(const SystemZVectorConstantInfo &VCI, + SDNode *Node); + // Try to use gather instruction Opcode to implement vector insertion N. bool tryGather(SDNode *N, unsigned Opcode); @@ -1132,6 +1135,35 @@ SelectCode(Or.getNode()); } +void SystemZDAGToDAGISel::loadVectorConstant( + const SystemZVectorConstantInfo &VCI, SDNode *Node) { + assert((VCI.Opcode == SystemZISD::BYTE_MASK || + VCI.Opcode == SystemZISD::REPLICATE || + VCI.Opcode == SystemZISD::ROTATE_MASK) && + "Bad opcode!"); + assert(VCI.VecVT.getSizeInBits() == 128 && "Expected a vector type"); + EVT VT = Node->getValueType(0); + SDLoc DL(Node); + SmallVector Ops; + for (unsigned OpVal : VCI.OpVals) + Ops.push_back(CurDAG->getConstant(OpVal, DL, MVT::i32)); + SDValue Op = CurDAG->getNode(VCI.Opcode, DL, VCI.VecVT, Ops); + + if (VCI.VecVT == VT.getSimpleVT()) + ReplaceNode(Node, Op.getNode()); + else if (VT.getSizeInBits() == 128) { + SDValue BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op); + ReplaceNode(Node, BitCast.getNode()); + SelectCode(BitCast.getNode()); + } else { // float or double + unsigned SubRegIdx = + (VT.getSizeInBits() == 32 ? SystemZ::subreg_h32 : SystemZ::subreg_h64); + ReplaceNode( + Node, CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, Op).getNode()); + } + SelectCode(Op.getNode()); +} + bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) { SDValue ElemV = N->getOperand(2); auto *ElemN = dyn_cast(ElemV); @@ -1529,13 +1561,9 @@ case ISD::BUILD_VECTOR: { auto *BVN = cast(Node); - SDLoc DL(Node); - EVT VT = Node->getValueType(0); - uint64_t Mask = 0; - if (SystemZTargetLowering::tryBuildVectorByteMask(BVN, Mask)) { - SDNode *Res = CurDAG->getMachineNode(SystemZ::VGBM, DL, VT, - CurDAG->getTargetConstant(Mask, DL, MVT::i32)); - ReplaceNode(Node, Res); + SystemZVectorConstantInfo VCI(BVN); + if (VCI.isVectorConstantLegal(*Subtarget)) { + loadVectorConstant(VCI, Node); return; } break; @@ -1545,23 +1573,10 @@ APFloat Imm = cast(Node)->getValueAPF(); if (Imm.isZero() || Imm.isNegZero()) break; - const SystemZInstrInfo *TII = getInstrInfo(); - EVT VT = Node->getValueType(0); - unsigned Start, End; - unsigned BitWidth = VT.getSizeInBits(); - bool Success = SystemZTargetLowering::analyzeFPImm(Imm, BitWidth, Start, - End, static_cast(TII)); (void)Success; + SystemZVectorConstantInfo VCI(Imm); + bool Success = VCI.isVectorConstantLegal(*Subtarget); (void)Success; assert(Success && "Expected legal FP immediate"); - SDLoc DL(Node); - unsigned Opcode = (BitWidth == 32 ? SystemZ::VGMF : SystemZ::VGMG); - SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, - CurDAG->getTargetConstant(Start, DL, MVT::i32), - CurDAG->getTargetConstant(End, DL, MVT::i32)); - unsigned SubRegIdx = (BitWidth == 32 ? SystemZ::subreg_h32 - : SystemZ::subreg_h64); - Res = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SDValue(Res, 0)) - .getNode(); - ReplaceNode(Node, Res); + loadVectorConstant(VCI, Node); return; } Index: lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.h +++ lib/Target/SystemZ/SystemZISelLowering.h @@ -162,6 +162,10 @@ // Transaction end. Just the chain operand. Returns CC value and chain. TEND, + // Create a vector constant by filling byte N of the result with bit + // 15-N of the single operand. + BYTE_MASK, + // Create a vector constant by replicating an element-sized RISBG-style mask. // The first operand specifies the starting set bit and the second operand // specifies the ending set bit. Both operands count from the MSB of the @@ -513,9 +517,6 @@ return true; } - static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask); - static bool analyzeFPImm(const APFloat &Imm, unsigned BitWidth, - unsigned &Start, unsigned &End, const SystemZInstrInfo *TII); private: const SystemZSubtarget &Subtarget; @@ -643,6 +644,33 @@ const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; }; + +struct SystemZVectorConstantInfo { +private: + APFloat FPImm; + BuildVectorSDNode *BVN; + + bool isScalarFP() { return BVN == nullptr; } + bool isBVN() { return !isScalarFP(); } + +public: + unsigned Opcode; + SmallVector OpVals; + MVT VecVT; + SystemZVectorConstantInfo(APFloat f) : FPImm(f), BVN(nullptr), Opcode(0) {}; + SystemZVectorConstantInfo(BuildVectorSDNode *b) : FPImm(0.0), BVN(b), + Opcode(0) { + assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR"); + }; + bool isFP128() { + return isScalarFP() && (&FPImm.getSemantics() == &APFloat::IEEEquad()); + } + APInt getIntBits(); + void getSplat(APInt &SplatBits, APInt &SplatUndef, unsigned &SplatBitSize); + bool isVectorConstantLegal(const SystemZSubtarget &Subtarget); +}; + + } // end namespace llvm #endif Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -577,26 +577,127 @@ return false; } +// Return true if the constant can be generated with a vector instruction, +// such as VGM, VGMB or VREPI. +bool SystemZVectorConstantInfo::isVectorConstantLegal( + const SystemZSubtarget &Subtarget) { + if (!Subtarget.hasVector() || + (isFP128() && !Subtarget.hasVectorEnhancements1())) + return false; -// Return true if Imm can be generated with a vector instruction, such as VGM. -bool SystemZTargetLowering:: -analyzeFPImm(const APFloat &Imm, unsigned BitWidth, unsigned &Start, - unsigned &End, const SystemZInstrInfo *TII) { - APInt IntImm = Imm.bitcastToAPInt(); - if (IntImm.getActiveBits() > 64) + // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- + // preferred way of creating all-zero and all-one vectors so give it + // priority over other methods below. + unsigned Mask = 0; + APInt IntBits = getIntBits(); + unsigned I = 0; + for (; I < SystemZ::VectorBytes; ++I) { + uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue(); + if (Byte == 0xff) + Mask |= 1ULL << I; + else if (Byte != 0) + break; + } + if (I == SystemZ::VectorBytes) { + Opcode = SystemZISD::BYTE_MASK; + OpVals.push_back(Mask); + VecVT = MVT::getVectorVT(MVT::getIntegerVT(8), 16); + return true; + } + + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + getSplat(SplatBits, SplatUndef, SplatBitSize); + if (SplatBitSize > 64) return false; - // See if this immediate could be generated with VGM. - bool Success = TII->isRxSBGMask(IntImm.getZExtValue(), BitWidth, Start, End); - if (!Success) + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); + + auto tryValue = [&](uint64_t Value) -> bool { + // Try VECTOR REPLICATE IMMEDIATE + int64_t SignedValue = SignExtend64(Value, SplatBitSize); + if (isInt<16>(SignedValue)) { + OpVals.push_back(((unsigned) SignedValue)); + Opcode = SystemZISD::REPLICATE; + VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), + SystemZ::VectorBits / SplatBitSize); + return true; + } + // Try VECTOR GENERATE MASK + unsigned Start, End; + if (TII->isRxSBGMask(Value, SplatBitSize, Start, End)) { + // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0 + // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for + // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1). + OpVals.push_back(Start - (64 - SplatBitSize)); + OpVals.push_back(End - (64 - SplatBitSize)); + Opcode = SystemZISD::ROTATE_MASK; + VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), + SystemZ::VectorBits / SplatBitSize); + return true; + } return false; - // isRxSBGMask returns the bit numbers for a full 64-bit value, - // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to - // bit numbers for an BitsPerElement value, so that 0 denotes - // 1 << (BitsPerElement-1). - Start -= 64 - BitWidth; - End -= 64 - BitWidth; - return true; + }; + + // First try assuming that any undefined bits above the highest set bit + // and below the lowest set bit are 1s. This increases the likelihood of + // being able to use a sign-extended element value in VECTOR REPLICATE + // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. + uint64_t SplatBitsZ = SplatBits.getZExtValue(); + uint64_t SplatUndefZ = SplatUndef.getZExtValue(); + uint64_t Lower = + (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); + uint64_t Upper = + (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); + if (tryValue(SplatBitsZ | Upper | Lower)) + return true; + + // Now try assuming that any undefined bits between the first and + // last defined set bits are set. This increases the chances of + // using a non-wraparound mask. + uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; + return tryValue(SplatBitsZ | Middle); +} + +APInt SystemZVectorConstantInfo::getIntBits() { + if (isBVN()) { + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 128, + true); + return SplatBits; + } + return FPImm.bitcastToAPInt().zextOrSelf(128); +} + +void SystemZVectorConstantInfo::getSplat(APInt &SplatBits, APInt &SplatUndef, + unsigned &SplatBitSize) { + if (isBVN()) { + bool HasAnyUndefs; + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, 8, + true); + return; + } + + // Find the splat for the FP Constant. + SplatBits = FPImm.bitcastToAPInt(); + unsigned Width = SplatBits.getBitWidth(); + while (Width > 8) { + unsigned HalfSize = Width / 2; + APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize); + APInt LowValue = SplatBits.trunc(HalfSize); + + // If the two halves do not match, stop here. + if (HighValue != LowValue || 8 > HalfSize) + break; + + SplatBits = HighValue; + Width = HalfSize; + } + SplatUndef = 0; + SplatBitSize = Width; } bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { @@ -604,12 +705,7 @@ if (Imm.isZero() || Imm.isNegZero()) return true; - if (!Subtarget.hasVector()) - return false; - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); - unsigned Start, End; - return analyzeFPImm(Imm, VT.getSizeInBits(), Start, End, TII); + return SystemZVectorConstantInfo(Imm).isVectorConstantLegal(Subtarget); } bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { @@ -4289,78 +4385,6 @@ return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); } -// Try to represent constant BUILD_VECTOR node BVN using a BYTE MASK style -// mask. Store the mask value in Mask on success. -bool SystemZTargetLowering:: -tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { - EVT ElemVT = BVN->getValueType(0).getVectorElementType(); - unsigned BytesPerElement = ElemVT.getStoreSize(); - for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { - SDValue Op = BVN->getOperand(I); - if (!Op.isUndef()) { - uint64_t Value; - if (Op.getOpcode() == ISD::Constant) - Value = cast(Op)->getZExtValue(); - else if (Op.getOpcode() == ISD::ConstantFP) - Value = (cast(Op)->getValueAPF().bitcastToAPInt() - .getZExtValue()); - else - return false; - for (unsigned J = 0; J < BytesPerElement; ++J) { - uint64_t Byte = (Value >> (J * 8)) & 0xff; - if (Byte == 0xff) - Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J); - else if (Byte != 0) - return false; - } - } - } - return true; -} - -// Try to load a vector constant in which BitsPerElement-bit value Value -// is replicated to fill the vector. VT is the type of the resulting -// constant, which may have elements of a different size from BitsPerElement. -// Return the SDValue of the constant on success, otherwise return -// an empty value. -static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, - const SystemZInstrInfo *TII, - const SDLoc &DL, EVT VT, uint64_t Value, - unsigned BitsPerElement) { - // Signed 16-bit values can be replicated using VREPI. - // Mark the constants as opaque or DAGCombiner will convert back to - // BUILD_VECTOR. - int64_t SignedValue = SignExtend64(Value, BitsPerElement); - if (isInt<16>(SignedValue)) { - MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), - SystemZ::VectorBits / BitsPerElement); - SDValue Op = DAG.getNode( - SystemZISD::REPLICATE, DL, VecVT, - DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/)); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); - } - // See whether rotating the constant left some N places gives a value that - // is one less than a power of 2 (i.e. all zeros followed by all ones). - // If so we can use VGM. - unsigned Start, End; - if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) { - // isRxSBGMask returns the bit numbers for a full 64-bit value, - // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to - // bit numbers for an BitsPerElement value, so that 0 denotes - // 1 << (BitsPerElement-1). - Start -= 64 - BitsPerElement; - End -= 64 - BitsPerElement; - MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), - SystemZ::VectorBits / BitsPerElement); - SDValue Op = DAG.getNode( - SystemZISD::ROTATE_MASK, DL, VecVT, - DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/), - DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/)); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); - } - return SDValue(); -} - // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR @@ -4561,55 +4585,14 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); auto *BVN = cast(Op.getNode()); SDLoc DL(Op); EVT VT = Op.getValueType(); if (BVN->isConstant()) { - // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- - // preferred way of creating all-zero and all-one vectors so give it - // priority over other methods below. - uint64_t Mask; - if (ISD::isBuildVectorAllZeros(Op.getNode()) || - ISD::isBuildVectorAllOnes(Op.getNode()) || - (VT.isInteger() && tryBuildVectorByteMask(BVN, Mask))) + if (SystemZVectorConstantInfo(BVN).isVectorConstantLegal(Subtarget)) return Op; - // Try using some form of replication. - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, - 8, true) && - SplatBitSize <= 64) { - // First try assuming that any undefined bits above the highest set bit - // and below the lowest set bit are 1s. This increases the likelihood of - // being able to use a sign-extended element value in VECTOR REPLICATE - // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. - uint64_t SplatBitsZ = SplatBits.getZExtValue(); - uint64_t SplatUndefZ = SplatUndef.getZExtValue(); - uint64_t Lower = (SplatUndefZ - & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); - uint64_t Upper = (SplatUndefZ - & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); - uint64_t Value = SplatBitsZ | Upper | Lower; - SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, - SplatBitSize); - if (Op.getNode()) - return Op; - - // Now try assuming that any undefined bits between the first and - // last defined set bits are set. This increases the chances of - // using a non-wraparound mask. - uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; - Value = SplatBitsZ | Middle; - Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize); - if (Op.getNode()) - return Op; - } - // Fall back to loading it from memory. return SDValue(); } @@ -5055,6 +5038,7 @@ OPCODE(TBEGIN); OPCODE(TBEGIN_NOFLOAT); OPCODE(TEND); + OPCODE(BYTE_MASK); OPCODE(ROTATE_MASK); OPCODE(REPLICATE); OPCODE(JOIN_DWORDS); Index: lib/Target/SystemZ/SystemZInstrVector.td =================================================================== --- lib/Target/SystemZ/SystemZInstrVector.td +++ lib/Target/SystemZ/SystemZInstrVector.td @@ -60,7 +60,7 @@ // Generate byte mask. def VZERO : InherentVRIa<"vzero", 0xE744, 0>; def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; - def VGBM : UnaryVRIa<"vgbm", 0xE744, null_frag, v128b, imm32zx16>; + def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>; // Generate mask. def VGM : BinaryVRIbGeneric<"vgm", 0xE746>; Index: lib/Target/SystemZ/SystemZOperators.td =================================================================== --- lib/Target/SystemZ/SystemZOperators.td +++ lib/Target/SystemZ/SystemZOperators.td @@ -286,6 +286,7 @@ SDT_ZInsertVectorElt>; def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", SDT_ZExtractVectorElt>; +def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>; def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>; def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>; def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>; Index: test/CodeGen/SystemZ/fp-const-11.ll =================================================================== --- test/CodeGen/SystemZ/fp-const-11.ll +++ test/CodeGen/SystemZ/fp-const-11.ll @@ -38,3 +38,33 @@ store fp128 0xL00000000000000003fff000002000000, fp128 *%x ret void } + +; Test that VGBM works. +define void @f4(fp128 *%x) { +; CHECK-LABEL: f4: +; CHECK: vgbm %v0, 21845 +; CHECK-NEXT: vst %v0, 0(%r2) +; CHECK-NEXT: br %r14 + store fp128 0xL00ff00ff00ff00ff00ff00ff00ff00ff, fp128 *%x + ret void +} + +; Test that VREPI works. +define void @f5(fp128 *%x) { +; CHECK-LABEL: f5: +; CHECK: vrepib %v0, -8 +; CHECK-NEXT: vst %v0, 0(%r2) +; CHECK-NEXT: br %r14 + store fp128 0xLf8f8f8f8f8f8f8f8f8f8f8f8f8f8f8f8, fp128 *%x + ret void +} + +; Test that VGM works. +define void @f6(fp128 *%x) { +; CHECK-LABEL: f6: +; CHECK: vgmg %v0, 12, 31 +; CHECK-NEXT: vst %v0, 0(%r2) +; CHECK-NEXT: br %r14 + store fp128 0xL000fffff00000000000fffff00000000, fp128 *%x + ret void +} Index: test/CodeGen/SystemZ/vec-const-05.ll =================================================================== --- test/CodeGen/SystemZ/vec-const-05.ll +++ test/CodeGen/SystemZ/vec-const-05.ll @@ -1,28 +1,63 @@ -; Test vector byte masks, v4f32 version. Only all-zero vectors are handled. +; Test vector byte masks, v4f32 version. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test an all-zeros vector. -define <4 x float> @f0() { -; CHECK-LABEL: f0: +define <4 x float> @f1() { +; CHECK-LABEL: f1: ; CHECK: vgbm %v24, 0 ; CHECK: br %r14 ret <4 x float> zeroinitializer } -; Test that undefs are treated as zero. -define <4 x float> @f1() { -; CHECK-LABEL: f1: -; CHECK: vgbm %v24, 0 +; Test an all-ones vector. +define <4 x float> @f2() { +; CHECK-LABEL: f2: +; CHECK: vgbm %v24, 65535 ; CHECK: br %r14 - ret <4 x float> + ret <4 x float> +} + +; Test a mixed vector (mask 0xc731). +define <4 x float> @f3() { +; CHECK-LABEL: f3: +; CHECK: vgbm %v24, 50993 +; CHECK: br %r14 + ret <4 x float> +} + +; Test that undefs are treated as zero (mask 0xc031). +define <4 x float> @f4() { +; CHECK-LABEL: f4: +; CHECK: vgbm %v24, 49201 +; CHECK: br %r14 + ret <4 x float> +} + +; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. +define <4 x float> @f5() { +; CHECK-LABEL: f5: +; CHECK-NOT: vgbm +; CHECK: br %r14 + ret <4 x float> } ; Test an all-zeros v2f32 that gets promoted to v4f32. -define <2 x float> @f2() { -; CHECK-LABEL: f2: +define <2 x float> @f6() { +; CHECK-LABEL: f6: ; CHECK: vgbm %v24, 0 ; CHECK: br %r14 ret <2 x float> zeroinitializer } + +; Test a mixed v2f32 that gets promoted to v4f32 (mask 0xc700). +define <2 x float> @f7() { +; CHECK-LABEL: f7: +; CHECK: vgbm %v24, 50944 +; CHECK: br %r14 + ret <2 x float> +} Index: test/CodeGen/SystemZ/vec-const-06.ll =================================================================== --- test/CodeGen/SystemZ/vec-const-06.ll +++ test/CodeGen/SystemZ/vec-const-06.ll @@ -1,19 +1,43 @@ -; Test vector byte masks, v2f64 version. Only all-zero vectors are handled. +; Test vector byte masks, v2f64 version. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test an all-zeros vector. -define <2 x double> @f0() { -; CHECK-LABEL: f0: +define <2 x double> @f1() { +; CHECK-LABEL: f1: ; CHECK: vgbm %v24, 0 ; CHECK: br %r14 ret <2 x double> zeroinitializer } -; Test that undefs are treated as zero. -define <2 x double> @f1() { -; CHECK-LABEL: f1: -; CHECK: vgbm %v24, 0 +; Test an all-ones vector. +define <2 x double> @f2() { +; CHECK-LABEL: f2: +; CHECK: vgbm %v24, 65535 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a mixed vector (mask 0x8c76). +define <2 x double> @f3() { +; CHECK-LABEL: f3: +; CHECK: vgbm %v24, 35958 +; CHECK: br %r14 + ret <2 x double> +} + +; Test that undefs are treated as zero (mask 0x8c00). +define <2 x double> @f4() { +; CHECK-LABEL: f4: +; CHECK: vgbm %v24, 35840 +; CHECK: br %r14 + ret <2 x double> +} + +; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. +define <2 x double> @f5() { +; CHECK-LABEL: f5: +; CHECK-NOT: vgbm ; CHECK: br %r14 - ret <2 x double> + ret <2 x double> } Index: test/CodeGen/SystemZ/vec-const-19.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/vec-const-19.ll @@ -0,0 +1,18 @@ +; Test that a scalar FP constant can be reused from a vector splat constant +; of the same value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +define void @fun() { +; CHECK-LABEL: fun: +; CHECK: vgmg %v0, 2, 10 +; CHECK-NOT: vgmg %v0, 2, 10 + + %tmp = fadd <2 x double> zeroinitializer, + %tmp1 = fmul <2 x double> %tmp, + store <2 x double> %tmp1, <2 x double>* undef + %tmp2 = load double, double* undef + %tmp3 = fmul double %tmp2, 5.000000e-01 + store double %tmp3, double* undef + ret void +}