Index: lib/Target/SystemZ/SystemZISelDAGToDAG.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -304,6 +304,9 @@ void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal); + // Try to load a vector constant with a REPLICATE or ROTATE_MASK. + bool tryReplicateConstantSplat(BuildVectorSDNode *BVN); + // Try to use gather instruction Opcode to implement vector insertion N. bool tryGather(SDNode *N, unsigned Opcode); @@ -1132,6 +1135,43 @@ SelectCode(Or.getNode()); } +bool SystemZDAGToDAGISel::tryReplicateConstantSplat(BuildVectorSDNode *BVN) { + const SystemZInstrInfo *TII = getInstrInfo(); + int64_t ReplicatedImm; + unsigned RotateStart, RotateEnd; + MVT VecVT; + if (!SystemZTargetLowering::analyzeBVNForConstantReplication( + BVN, ReplicatedImm, RotateStart, RotateEnd, VecVT, TII)) + return false; + + SDLoc DL(BVN); + EVT VT = BVN->getValueType(0); + SDValue Op; + SDValue BitCast; + if (ReplicatedImm != INT64_MAX) { + Op = CurDAG->getNode(SystemZISD::REPLICATE, DL, VecVT, + CurDAG->getConstant(ReplicatedImm, DL, MVT::i32, false, + true /*isOpaque*/)); + BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op); + } else { + Op = CurDAG->getNode( + SystemZISD::ROTATE_MASK, DL, VecVT, + CurDAG->getConstant(RotateStart, DL, MVT::i32, false, + true /*isOpaque*/), + CurDAG->getConstant(RotateEnd, DL, MVT::i32, false, true /*isOpaque*/)); + BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op); + } + + ReplaceNode(BVN, BitCast.getNode()); + SelectCode(BitCast.getNode()); + if (Op != BitCast) { + assert(!Op.use_empty() && "Expected bitcasted SDValue to remain in DAG"); + SelectCode(Op.getNode()); + } + + return true; +} + bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) { SDValue ElemV = N->getOperand(2); auto *ElemN = dyn_cast(ElemV); @@ -1538,6 +1578,8 @@ ReplaceNode(Node, Res); return; } + if (tryReplicateConstantSplat(BVN)) + return; break; } Index: lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.h +++ lib/Target/SystemZ/SystemZISelLowering.h @@ -516,6 +516,11 @@ static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask); static bool analyzeFPImm(const APFloat &Imm, unsigned BitWidth, unsigned &Start, unsigned &End, const SystemZInstrInfo *TII); + static bool analyzeBVNForConstantReplication(BuildVectorSDNode *BVN, + int64_t &ReplicatedImm, + unsigned &RotateStart, + unsigned &RotateEnd, MVT &VecVT, + const SystemZInstrInfo *TII); private: const SystemZSubtarget &Subtarget; Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -4318,49 +4318,6 @@ return true; } -// Try to load a vector constant in which BitsPerElement-bit value Value -// is replicated to fill the vector. VT is the type of the resulting -// constant, which may have elements of a different size from BitsPerElement. -// Return the SDValue of the constant on success, otherwise return -// an empty value. -static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, - const SystemZInstrInfo *TII, - const SDLoc &DL, EVT VT, uint64_t Value, - unsigned BitsPerElement) { - // Signed 16-bit values can be replicated using VREPI. - // Mark the constants as opaque or DAGCombiner will convert back to - // BUILD_VECTOR. - int64_t SignedValue = SignExtend64(Value, BitsPerElement); - if (isInt<16>(SignedValue)) { - MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), - SystemZ::VectorBits / BitsPerElement); - SDValue Op = DAG.getNode( - SystemZISD::REPLICATE, DL, VecVT, - DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/)); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); - } - // See whether rotating the constant left some N places gives a value that - // is one less than a power of 2 (i.e. all zeros followed by all ones). - // If so we can use VGM. - unsigned Start, End; - if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) { - // isRxSBGMask returns the bit numbers for a full 64-bit value, - // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to - // bit numbers for an BitsPerElement value, so that 0 denotes - // 1 << (BitsPerElement-1). - Start -= 64 - BitsPerElement; - End -= 64 - BitsPerElement; - MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), - SystemZ::VectorBits / BitsPerElement); - SDValue Op = DAG.getNode( - SystemZISD::ROTATE_MASK, DL, VecVT, - DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/), - DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/)); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); - } - return SDValue(); -} - // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR @@ -4559,10 +4516,60 @@ return Result; } +// Return true if BVN holds a vector constant splat which can be loaded with +// a REPLICATE or ROTATE_MASK. ReplicatedImm is then the value to use with +// REPLICATE, or INT64_MAX in which case RotateStart and RotateEnd hold the +// values for ROTATE_MASK. VecVT is the type of the resulting constant, +// which may have elements of a different size from the BVN elements. +bool SystemZTargetLowering::analyzeBVNForConstantReplication( + BuildVectorSDNode *BVN, int64_t &ReplicatedImm, unsigned &RotateStart, + unsigned &RotateEnd, MVT &VecVT, const SystemZInstrInfo *TII) { + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (!(BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, + 8, true) && + SplatBitSize <= 64)) + return false; + VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), + SystemZ::VectorBits / SplatBitSize); + ReplicatedImm = INT64_MAX; + auto tryValue = [&](uint64_t Value) -> bool { + int64_t SignedValue = SignExtend64(Value, SplatBitSize); + if (isInt<16>(SignedValue)) { + ReplicatedImm = SignedValue; + return true; + } + if (TII->isRxSBGMask(Value, SplatBitSize, RotateStart, RotateEnd)) { + RotateStart -= 64 - SplatBitSize; + RotateEnd -= 64 - SplatBitSize; + return true; + } + return false; + }; + + // First try assuming that any undefined bits above the highest set bit + // and below the lowest set bit are 1s. This increases the likelihood of + // being able to use a sign-extended element value in VECTOR REPLICATE + // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. + uint64_t SplatBitsZ = SplatBits.getZExtValue(); + uint64_t SplatUndefZ = SplatUndef.getZExtValue(); + uint64_t Lower = + (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); + uint64_t Upper = + (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); + if (tryValue(SplatBitsZ | Upper | Lower)) + return true; + + // Now try assuming that any undefined bits between the first and + // last defined set bits are set. This increases the chances of + // using a non-wraparound mask. + uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; + return tryValue(SplatBitsZ | Middle); +} + SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); auto *BVN = cast(Op.getNode()); SDLoc DL(Op); EVT VT = Op.getValueType(); @@ -4578,37 +4585,14 @@ return Op; // Try using some form of replication. - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, - 8, true) && - SplatBitSize <= 64) { - // First try assuming that any undefined bits above the highest set bit - // and below the lowest set bit are 1s. This increases the likelihood of - // being able to use a sign-extended element value in VECTOR REPLICATE - // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. - uint64_t SplatBitsZ = SplatBits.getZExtValue(); - uint64_t SplatUndefZ = SplatUndef.getZExtValue(); - uint64_t Lower = (SplatUndefZ - & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); - uint64_t Upper = (SplatUndefZ - & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); - uint64_t Value = SplatBitsZ | Upper | Lower; - SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, - SplatBitSize); - if (Op.getNode()) - return Op; - - // Now try assuming that any undefined bits between the first and - // last defined set bits are set. This increases the chances of - // using a non-wraparound mask. - uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; - Value = SplatBitsZ | Middle; - Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize); - if (Op.getNode()) - return Op; - } + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); + int64_t ReplicatedImm; + unsigned RotateStart, RotateEnd; + MVT VecVT; + if (analyzeBVNForConstantReplication(BVN, ReplicatedImm, RotateStart, + RotateEnd, VecVT, TII)) + return Op; // Fall back to loading it from memory. return SDValue(); Index: test/CodeGen/SystemZ/vec-const-11.ll =================================================================== --- test/CodeGen/SystemZ/vec-const-11.ll +++ /dev/null @@ -1,189 +0,0 @@ -; Test vector replicates, v4f32 version. -; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - -; Test a byte-granularity replicate with the lowest useful value. -define <4 x float> @f1() { -; CHECK-LABEL: f1: -; CHECK: vrepib %v24, 1 -; CHECK: br %r14 - ret <4 x float> -} - -; Test a byte-granularity replicate with an arbitrary value. -define <4 x float> @f2() { -; CHECK-LABEL: f2: -; CHECK: vrepib %v24, -55 -; CHECK: br %r14 - ret <4 x float> -} - -; Test a byte-granularity replicate with the highest useful value. -define <4 x float> @f3() { -; CHECK-LABEL: f3: -; CHECK: vrepib %v24, -2 -; CHECK: br %r14 - ret <4 x float> -} - -; Test a halfword-granularity replicate with the lowest useful value. -define <4 x float> @f4() { -; CHECK-LABEL: f4: -; CHECK: vrepih %v24, 1 -; CHECK: br %r14 - ret <4 x float> -} - -; Test a halfword-granularity replicate with an arbitrary value. -define <4 x float> @f5() { -; CHECK-LABEL: f5: -; CHECK: vrepih %v24, 25650 -; CHECK: br %r14 - ret <4 x float> -} - -; Test a halfword-granularity replicate with the highest useful value. -define <4 x float> @f6() { -; CHECK-LABEL: f6: -; CHECK: vrepih %v24, -2 -; CHECK: br %r14 - ret <4 x float> -} - -; Test a word-granularity replicate with the lowest useful positive value. -define <4 x float> @f7() { -; CHECK-LABEL: f7: -; CHECK: vrepif %v24, 1 -; CHECK: br %r14 - ret <4 x float> -} - -; Test a word-granularity replicate with the highest in-range value. -define <4 x float> @f8() { -; CHECK-LABEL: f8: -; CHECK: vrepif %v24, 32767 -; CHECK: br %r14 - ret <4 x float> -} - -; Test a word-granularity replicate with the next highest value. -; This cannot use VREPIF. -define <4 x float> @f9() { -; CHECK-LABEL: f9: -; CHECK-NOT: vrepif -; CHECK: br %r14 - ret <4 x float> -} - -; Test a word-granularity replicate with the lowest in-range value. -define <4 x float> @f10() { -; CHECK-LABEL: f10: -; CHECK: vrepif %v24, -32768 -; CHECK: br %r14 - ret <4 x float> -} - -; Test a word-granularity replicate with the next lowest value. -; This cannot use VREPIF. -define <4 x float> @f11() { -; CHECK-LABEL: f11: -; CHECK-NOT: vrepif -; CHECK: br %r14 - ret <4 x float> -} - -; Test a word-granularity replicate with the highest useful negative value. -define <4 x float> @f12() { -; CHECK-LABEL: f12: -; CHECK: vrepif %v24, -2 -; CHECK: br %r14 - ret <4 x float> -} - -; Test a doubleword-granularity replicate with the lowest useful positive -; value. -define <4 x float> @f13() { -; CHECK-LABEL: f13: -; CHECK: vrepig %v24, 1 -; CHECK: br %r14 - ret <4 x float> -} - -; Test a doubleword-granularity replicate with the highest in-range value. -define <4 x float> @f14() { -; CHECK-LABEL: f14: -; CHECK: vrepig %v24, 32767 -; CHECK: br %r14 - ret <4 x float> -} - -; Test a doubleword-granularity replicate with the next highest value. -; This cannot use VREPIG. -define <4 x float> @f15() { -; CHECK-LABEL: f15: -; CHECK-NOT: vrepig -; CHECK: br %r14 - ret <4 x float> -} - -; Test a doubleword-granularity replicate with the lowest in-range value. -define <4 x float> @f16() { -; CHECK-LABEL: f16: -; CHECK: vrepig %v24, -32768 -; CHECK: br %r14 - ret <4 x float> -} - -; Test a doubleword-granularity replicate with the next lowest value. -; This cannot use VREPIG. -define <4 x float> @f17() { -; CHECK-LABEL: f17: -; CHECK-NOT: vrepig -; CHECK: br %r14 - ret <4 x float> -} - -; Test a doubleword-granularity replicate with the highest useful negative -; value. -define <4 x float> @f18() { -; CHECK-LABEL: f18: -; CHECK: vrepig %v24, -2 -; CHECK: br %r14 - ret <4 x float> -} - -; Repeat f14 with undefs optimistically treated as 0, 32767. -define <4 x float> @f19() { -; CHECK-LABEL: f19: -; CHECK: vrepig %v24, 32767 -; CHECK: br %r14 - ret <4 x float> -} - -; Repeat f18 with undefs optimistically treated as -2, -1. -define <4 x float> @f20() { -; CHECK-LABEL: f20: -; CHECK: vrepig %v24, -2 -; CHECK: br %r14 - ret <4 x float> -} Index: test/CodeGen/SystemZ/vec-const-12.ll =================================================================== --- test/CodeGen/SystemZ/vec-const-12.ll +++ /dev/null @@ -1,169 +0,0 @@ -; Test vector replicates, v2f64 version. -; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - -; Test a byte-granularity replicate with the lowest useful value. -define <2 x double> @f1() { -; CHECK-LABEL: f1: -; CHECK: vrepib %v24, 1 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a byte-granularity replicate with an arbitrary value. -define <2 x double> @f2() { -; CHECK-LABEL: f2: -; CHECK: vrepib %v24, -55 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a byte-granularity replicate with the highest useful value. -define <2 x double> @f3() { -; CHECK-LABEL: f3: -; CHECK: vrepib %v24, -2 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a halfword-granularity replicate with the lowest useful value. -define <2 x double> @f4() { -; CHECK-LABEL: f4: -; CHECK: vrepih %v24, 1 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a halfword-granularity replicate with an arbitrary value. -define <2 x double> @f5() { -; CHECK-LABEL: f5: -; CHECK: vrepih %v24, 25650 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a halfword-granularity replicate with the highest useful value. -define <2 x double> @f6() { -; CHECK-LABEL: f6: -; CHECK: vrepih %v24, -2 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a word-granularity replicate with the lowest useful positive value. -define <2 x double> @f7() { -; CHECK-LABEL: f7: -; CHECK: vrepif %v24, 1 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a word-granularity replicate with the highest in-range value. -define <2 x double> @f8() { -; CHECK-LABEL: f8: -; CHECK: vrepif %v24, 32767 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a word-granularity replicate with the next highest value. -; This cannot use VREPIF. -define <2 x double> @f9() { -; CHECK-LABEL: f9: -; CHECK-NOT: vrepif -; CHECK: br %r14 - ret <2 x double> -} - -; Test a word-granularity replicate with the lowest in-range value. -define <2 x double> @f10() { -; CHECK-LABEL: f10: -; CHECK: vrepif %v24, -32768 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a word-granularity replicate with the next lowest value. -; This cannot use VREPIF. -define <2 x double> @f11() { -; CHECK-LABEL: f11: -; CHECK-NOT: vrepif -; CHECK: br %r14 - ret <2 x double> -} - -; Test a word-granularity replicate with the highest useful negative value. -define <2 x double> @f12() { -; CHECK-LABEL: f12: -; CHECK: vrepif %v24, -2 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a doubleword-granularity replicate with the lowest useful positive -; value. -define <2 x double> @f13() { -; CHECK-LABEL: f13: -; CHECK: vrepig %v24, 1 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a doubleword-granularity replicate with the highest in-range value. -define <2 x double> @f14() { -; CHECK-LABEL: f14: -; CHECK: vrepig %v24, 32767 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a doubleword-granularity replicate with the next highest value. -; This cannot use VREPIG. -define <2 x double> @f15() { -; CHECK-LABEL: f15: -; CHECK-NOT: vrepig -; CHECK: br %r14 - ret <2 x double> -} - -; Test a doubleword-granularity replicate with the lowest in-range value. -define <2 x double> @f16() { -; CHECK-LABEL: f16: -; CHECK: vrepig %v24, -32768 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a doubleword-granularity replicate with the next lowest value. -; This cannot use VREPIG. -define <2 x double> @f17() { -; CHECK-LABEL: f17: -; CHECK-NOT: vrepig -; CHECK: br %r14 - ret <2 x double> -} - -; Test a doubleword-granularity replicate with the highest useful negative -; value. -define <2 x double> @f18() { -; CHECK-LABEL: f18: -; CHECK: vrepig %v24, -2 -; CHECK: br %r14 - ret <2 x double> -} - -; Repeat f14 with undefs optimistically treated as 32767. -define <2 x double> @f19() { -; CHECK-LABEL: f19: -; CHECK: vrepig %v24, 32767 -; CHECK: br %r14 - ret <2 x double> -} - -; Repeat f18 with undefs optimistically treated as -2. -define <2 x double> @f20() { -; CHECK-LABEL: f20: -; CHECK: vrepig %v24, -2 -; CHECK: br %r14 - ret <2 x double> -} Index: test/CodeGen/SystemZ/vec-const-18.ll =================================================================== --- test/CodeGen/SystemZ/vec-const-18.ll +++ test/CodeGen/SystemZ/vec-const-18.ll @@ -2,83 +2,42 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -; Test a word-granularity replicate with the lowest value that cannot use -; VREPIF. -define <2 x double> @f1() { -; CHECK-LABEL: f1: -; CHECK: vgmf %v24, 16, 16 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a word-granularity replicate that has the lower 17 bits set. -define <2 x double> @f2() { -; CHECK-LABEL: f2: -; CHECK: vgmf %v24, 15, 31 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a word-granularity replicate that has the upper 15 bits set. -define <2 x double> @f3() { -; CHECK-LABEL: f3: -; CHECK: vgmf %v24, 0, 14 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a word-granularity replicate that has middle bits set. -define <2 x double> @f4() { -; CHECK-LABEL: f4: -; CHECK: vgmf %v24, 2, 11 -; CHECK: br %r14 - ret <2 x double> -} - -; Test a word-granularity replicate with a wrap-around mask. -define <2 x double> @f5() { -; CHECK-LABEL: f5: -; CHECK: vgmf %v24, 17, 15 -; CHECK: br %r14 - ret <2 x double> -} - ; Test a doubleword-granularity replicate with the lowest value that cannot ; use VREPIG. -define <2 x double> @f6() { -; CHECK-LABEL: f6: +define <2 x double> @f1() { +; CHECK-LABEL: f1: ; CHECK: vgmg %v24, 48, 48 ; CHECK: br %r14 ret <2 x double> } ; Test a doubleword-granularity replicate that has the lower 22 bits set. -define <2 x double> @f7() { -; CHECK-LABEL: f7: +define <2 x double> @f2() { +; CHECK-LABEL: f2: ; CHECK: vgmg %v24, 42, 63 ; CHECK: br %r14 ret <2 x double> } ; Test a doubleword-granularity replicate that has the upper 45 bits set. -define <2 x double> @f8() { -; CHECK-LABEL: f8: +define <2 x double> @f3() { +; CHECK-LABEL: f3: ; CHECK: vgmg %v24, 0, 44 ; CHECK: br %r14 ret <2 x double> } ; Test a doubleword-granularity replicate that has middle bits set. -define <2 x double> @f9() { -; CHECK-LABEL: f9: +define <2 x double> @f4() { +; CHECK-LABEL: f4: ; CHECK: vgmg %v24, 2, 11 ; CHECK: br %r14 ret <2 x double> } ; Test a doubleword-granularity replicate with a wrap-around mask. -define <2 x double> @f10() { -; CHECK-LABEL: f10: +define <2 x double> @f5() { +; CHECK-LABEL: f5: ; CHECK: vgmg %v24, 10, 0 ; CHECK: br %r14 ret <2 x double>