Index: lib/Target/SystemZ/SystemZISelDAGToDAG.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ lib/Target/SystemZ/SystemZISelDAGToDAG.cpp @@ -304,6 +304,11 @@ void splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, uint64_t UpperVal, uint64_t LowerVal); + typedef SystemZTargetLowering::SystemZVectorConstantInfo + SystemZVectorConstantInfo; + void loadVectorConstant(const SystemZVectorConstantInfo &VCI, + SDNode *Node); + // Try to use gather instruction Opcode to implement vector insertion N. bool tryGather(SDNode *N, unsigned Opcode); @@ -1132,6 +1137,49 @@ SelectCode(Or.getNode()); } +void SystemZDAGToDAGISel:: +loadVectorConstant(const SystemZVectorConstantInfo &VCI, SDNode *Node) { + SDLoc DL(Node); + EVT VT = Node->getValueType(0); + SDValue Op; + SDValue BitCast; + + if (VCI.Opcode == SystemZ::VGBM) + Op = SDValue(CurDAG->getMachineNode(SystemZ::VGBM, DL, MVT::v16i8, + CurDAG->getTargetConstant(VCI.Mask, DL, MVT::i32)), 0); + else if (VCI.Opcode == SystemZ::VREPI) + Op = CurDAG->getNode(SystemZISD::REPLICATE, DL, VCI.VecVT, + CurDAG->getConstant(VCI.ReplicatedImm, DL, MVT::i32, false, + true /*isOpaque*/)); + else { + assert(VCI.Opcode == SystemZ::VGM && "Unhandled opcode."); + Op = CurDAG->getNode( + SystemZISD::ROTATE_MASK, DL, VCI.VecVT, + CurDAG->getConstant(VCI.Start, DL, MVT::i32, false, true /*isOpaque*/), + CurDAG->getConstant(VCI.End, DL, MVT::i32, false, true /*isOpaque*/)); + } + + if (!VT.isVector()) { + unsigned SubRegIdx = (VT.getSizeInBits() == 32 ? SystemZ::subreg_h32 + : SystemZ::subreg_h64); + SDNode *SubReg = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, Op) + .getNode(); + ReplaceNode(Node, SubReg); + if (!Op.isMachineOpcode()) + SelectCode(Op.getNode()); + return; + } + + BitCast = CurDAG->getNode(ISD::BITCAST, DL, VT, Op); + ReplaceNode(Node, BitCast.getNode()); + if (!BitCast.isMachineOpcode()) + SelectCode(BitCast.getNode()); + if (Op != BitCast && !Op.isMachineOpcode()) { + assert(!Op.use_empty() && "Expected bitcasted SDValue to remain in DAG"); + SelectCode(Op.getNode()); + } +} + bool SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) { SDValue ElemV = N->getOperand(2); auto *ElemN = dyn_cast(ElemV); @@ -1426,6 +1474,9 @@ return; } + const SystemZTargetLowering *TLI = + static_cast(getTargetLowering()); + unsigned Opcode = Node->getOpcode(); switch (Opcode) { case ISD::OR: @@ -1529,13 +1580,9 @@ case ISD::BUILD_VECTOR: { auto *BVN = cast(Node); - SDLoc DL(Node); - EVT VT = Node->getValueType(0); - uint64_t Mask = 0; - if (SystemZTargetLowering::tryBuildVectorByteMask(BVN, Mask)) { - SDNode *Res = CurDAG->getMachineNode(SystemZ::VGBM, DL, VT, - CurDAG->getTargetConstant(Mask, DL, MVT::i32)); - ReplaceNode(Node, Res); + SystemZVectorConstantInfo VCI(BVN); + if (TLI->isVectorConstantLegal(VCI)) { + loadVectorConstant(VCI, Node); return; } break; @@ -1545,23 +1592,10 @@ APFloat Imm = cast(Node)->getValueAPF(); if (Imm.isZero() || Imm.isNegZero()) break; - const SystemZInstrInfo *TII = getInstrInfo(); - EVT VT = Node->getValueType(0); - unsigned Start, End; - unsigned BitWidth = VT.getSizeInBits(); - bool Success = SystemZTargetLowering::analyzeFPImm(Imm, BitWidth, Start, - End, static_cast(TII)); (void)Success; + SystemZVectorConstantInfo VCI(Imm); + bool Success = TLI->isVectorConstantLegal(VCI); (void)Success; assert(Success && "Expected legal FP immediate"); - SDLoc DL(Node); - unsigned Opcode = (BitWidth == 32 ? SystemZ::VGMF : SystemZ::VGMG); - SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, - CurDAG->getTargetConstant(Start, DL, MVT::i32), - CurDAG->getTargetConstant(End, DL, MVT::i32)); - unsigned SubRegIdx = (BitWidth == 32 ? SystemZ::subreg_h32 - : SystemZ::subreg_h64); - Res = CurDAG->getTargetExtractSubreg(SubRegIdx, DL, VT, SDValue(Res, 0)) - .getNode(); - ReplaceNode(Node, Res); + loadVectorConstant(VCI, Node); return; } Index: lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.h +++ lib/Target/SystemZ/SystemZISelLowering.h @@ -513,9 +513,36 @@ return true; } - static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask); - static bool analyzeFPImm(const APFloat &Imm, unsigned BitWidth, - unsigned &Start, unsigned &End, const SystemZInstrInfo *TII); + struct SystemZVectorConstantInfo { + private: + APFloat FPImm; + BuildVectorSDNode *BVN; + + void init() { Mask = 0; ReplicatedImm = 0; Start = 0; End = 0; Opcode = 0; } + public: + + uint64_t Mask; // VGBM + int64_t ReplicatedImm; // VREPI + unsigned Start; // VGM Start + unsigned End; // VGM End + unsigned Opcode; + MVT VecVT; + SystemZVectorConstantInfo(APFloat f) : FPImm(f), BVN(nullptr) { + init(); + assert(FPImm.bitcastToAPInt().getBitWidth() <= 64 && "Unhandled FP!"); + }; + SystemZVectorConstantInfo(BuildVectorSDNode *b) : FPImm(0.0), + BVN(b) { + init(); + assert(BVN->isConstant() && "Expected a constant BUILD_VECTOR"); + }; + + APInt getIntBits(); + void getSplat(APInt &SplatBits, APInt &SplatUndef, unsigned &SplatBitSize); + }; + + bool isVectorConstantLegal(SystemZVectorConstantInfo &VCI) const; + private: const SystemZSubtarget &Subtarget; Index: lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- lib/Target/SystemZ/SystemZISelLowering.cpp +++ lib/Target/SystemZ/SystemZISelLowering.cpp @@ -577,26 +577,122 @@ return false; } - -// Return true if Imm can be generated with a vector instruction, such as VGM. +// Return true if Bits can be generated with a vector instruction, such as +// VGM, VGMB or VREPI. VCI is used to remember the details. bool SystemZTargetLowering:: -analyzeFPImm(const APFloat &Imm, unsigned BitWidth, unsigned &Start, - unsigned &End, const SystemZInstrInfo *TII) { - APInt IntImm = Imm.bitcastToAPInt(); - if (IntImm.getActiveBits() > 64) +isVectorConstantLegal(SystemZVectorConstantInfo &VCI) const { + if (!Subtarget.hasVector()) return false; - // See if this immediate could be generated with VGM. - bool Success = TII->isRxSBGMask(IntImm.getZExtValue(), BitWidth, Start, End); - if (!Success) + // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- + // preferred way of creating all-zero and all-one vectors so give it + // priority over other methods below. + APInt IntBits = VCI.getIntBits(); + unsigned I = 0; + for (; I < SystemZ::VectorBytes; ++I) { + uint64_t Byte = IntBits.lshr(I * 8).trunc(8).getZExtValue(); + if (Byte == 0xff) + VCI.Mask |= 1ULL << I; + else if (Byte != 0) + break; + } + if (I == SystemZ::VectorBytes) { + VCI.Opcode = SystemZ::VGBM; + return true; + } + + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + VCI.getSplat(SplatBits, SplatUndef, SplatBitSize); + if (SplatBitSize > 64) return false; - // isRxSBGMask returns the bit numbers for a full 64-bit value, - // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to - // bit numbers for an BitsPerElement value, so that 0 denotes - // 1 << (BitsPerElement-1). - Start -= 64 - BitWidth; - End -= 64 - BitWidth; - return true; + + const SystemZInstrInfo *TII = + static_cast(Subtarget.getInstrInfo()); + + auto tryValue = [&](uint64_t Value) -> bool { + // Try VECTOR REPLICATE IMMEDIATE + int64_t SignedValue = SignExtend64(Value, SplatBitSize); + if (isInt<16>(SignedValue)) { + VCI.ReplicatedImm = SignedValue; + VCI.Opcode = SystemZ::VREPI; + VCI.VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), + SystemZ::VectorBits / SplatBitSize); + return true; + } + // Try VECTOR GENERATE MASK + if (TII->isRxSBGMask(Value, SplatBitSize, VCI.Start, VCI.End)) { + // isRxSBGMask returns the bit numbers for a full 64-bit value, with 0 + // denoting 1 << 63 and 63 denoting 1. Convert them to bit numbers for + // an SplatBitSize value, so that 0 denotes 1 << (SplatBitSize-1). + VCI.Start -= 64 - SplatBitSize; + VCI.End -= 64 - SplatBitSize; + VCI.Opcode = SystemZ::VGM; + VCI.VecVT = MVT::getVectorVT(MVT::getIntegerVT(SplatBitSize), + SystemZ::VectorBits / SplatBitSize); + return true; + } + return false; + }; + + // First try assuming that any undefined bits above the highest set bit + // and below the lowest set bit are 1s. This increases the likelihood of + // being able to use a sign-extended element value in VECTOR REPLICATE + // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. + uint64_t SplatBitsZ = SplatBits.getZExtValue(); + uint64_t SplatUndefZ = SplatUndef.getZExtValue(); + uint64_t Lower = + (SplatUndefZ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); + uint64_t Upper = + (SplatUndefZ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); + if (tryValue(SplatBitsZ | Upper | Lower)) + return true; + + // Now try assuming that any undefined bits between the first and + // last defined set bits are set. This increases the chances of + // using a non-wraparound mask. + uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; + return tryValue(SplatBitsZ | Middle); +} + +APInt SystemZTargetLowering::SystemZVectorConstantInfo::getIntBits() { + if (BVN != nullptr) { + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, + HasAnyUndefs, 128, true); + return SplatBits; + } + return FPImm.bitcastToAPInt().zext(128); +} + +void SystemZTargetLowering::SystemZVectorConstantInfo:: +getSplat(APInt &SplatBits, APInt &SplatUndef, unsigned &SplatBitSize) { + if (BVN != nullptr) { + bool HasAnyUndefs; + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, + HasAnyUndefs, 8, true); + return; + } + + // Find the splat for the FP Constant. + SplatBits = FPImm.bitcastToAPInt(); + unsigned Width = SplatBits.getBitWidth(); + while (Width > 8) { + unsigned HalfSize = Width / 2; + APInt HighValue = SplatBits.lshr(HalfSize).trunc(HalfSize); + APInt LowValue = SplatBits.trunc(HalfSize); + + // If the two halves do not match, stop here. + if (HighValue != LowValue || 8 > HalfSize) + break; + + SplatBits = HighValue; + Width = HalfSize; + } + SplatUndef = 0; + SplatBitSize = Width; } bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { @@ -604,12 +700,11 @@ if (Imm.isZero() || Imm.isNegZero()) return true; - if (!Subtarget.hasVector()) + if (Imm.bitcastToAPInt().getBitWidth() > 64) return false; - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); - unsigned Start, End; - return analyzeFPImm(Imm, VT.getSizeInBits(), Start, End, TII); + + SystemZVectorConstantInfo VCI(Imm); + return isVectorConstantLegal(VCI); } bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { @@ -4289,78 +4384,6 @@ return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); } -// Try to represent constant BUILD_VECTOR node BVN using a BYTE MASK style -// mask. Store the mask value in Mask on success. -bool SystemZTargetLowering:: -tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { - EVT ElemVT = BVN->getValueType(0).getVectorElementType(); - unsigned BytesPerElement = ElemVT.getStoreSize(); - for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { - SDValue Op = BVN->getOperand(I); - if (!Op.isUndef()) { - uint64_t Value; - if (Op.getOpcode() == ISD::Constant) - Value = cast(Op)->getZExtValue(); - else if (Op.getOpcode() == ISD::ConstantFP) - Value = (cast(Op)->getValueAPF().bitcastToAPInt() - .getZExtValue()); - else - return false; - for (unsigned J = 0; J < BytesPerElement; ++J) { - uint64_t Byte = (Value >> (J * 8)) & 0xff; - if (Byte == 0xff) - Mask |= 1ULL << ((E - I - 1) * BytesPerElement + J); - else if (Byte != 0) - return false; - } - } - } - return true; -} - -// Try to load a vector constant in which BitsPerElement-bit value Value -// is replicated to fill the vector. VT is the type of the resulting -// constant, which may have elements of a different size from BitsPerElement. -// Return the SDValue of the constant on success, otherwise return -// an empty value. -static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, - const SystemZInstrInfo *TII, - const SDLoc &DL, EVT VT, uint64_t Value, - unsigned BitsPerElement) { - // Signed 16-bit values can be replicated using VREPI. - // Mark the constants as opaque or DAGCombiner will convert back to - // BUILD_VECTOR. - int64_t SignedValue = SignExtend64(Value, BitsPerElement); - if (isInt<16>(SignedValue)) { - MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), - SystemZ::VectorBits / BitsPerElement); - SDValue Op = DAG.getNode( - SystemZISD::REPLICATE, DL, VecVT, - DAG.getConstant(SignedValue, DL, MVT::i32, false, true /*isOpaque*/)); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); - } - // See whether rotating the constant left some N places gives a value that - // is one less than a power of 2 (i.e. all zeros followed by all ones). - // If so we can use VGM. - unsigned Start, End; - if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) { - // isRxSBGMask returns the bit numbers for a full 64-bit value, - // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to - // bit numbers for an BitsPerElement value, so that 0 denotes - // 1 << (BitsPerElement-1). - Start -= 64 - BitsPerElement; - End -= 64 - BitsPerElement; - MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), - SystemZ::VectorBits / BitsPerElement); - SDValue Op = DAG.getNode( - SystemZISD::ROTATE_MASK, DL, VecVT, - DAG.getConstant(Start, DL, MVT::i32, false, true /*isOpaque*/), - DAG.getConstant(End, DL, MVT::i32, false, true /*isOpaque*/)); - return DAG.getNode(ISD::BITCAST, DL, VT, Op); - } - return SDValue(); -} - // If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually // better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for // the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR @@ -4561,55 +4584,15 @@ SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { - const SystemZInstrInfo *TII = - static_cast(Subtarget.getInstrInfo()); auto *BVN = cast(Op.getNode()); SDLoc DL(Op); EVT VT = Op.getValueType(); if (BVN->isConstant()) { - // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- - // preferred way of creating all-zero and all-one vectors so give it - // priority over other methods below. - uint64_t Mask; - if (ISD::isBuildVectorAllZeros(Op.getNode()) || - ISD::isBuildVectorAllOnes(Op.getNode()) || - (VT.isInteger() && tryBuildVectorByteMask(BVN, Mask))) + SystemZVectorConstantInfo VCI(BVN); + if (isVectorConstantLegal(VCI)) return Op; - // Try using some form of replication. - APInt SplatBits, SplatUndef; - unsigned SplatBitSize; - bool HasAnyUndefs; - if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, - 8, true) && - SplatBitSize <= 64) { - // First try assuming that any undefined bits above the highest set bit - // and below the lowest set bit are 1s. This increases the likelihood of - // being able to use a sign-extended element value in VECTOR REPLICATE - // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. - uint64_t SplatBitsZ = SplatBits.getZExtValue(); - uint64_t SplatUndefZ = SplatUndef.getZExtValue(); - uint64_t Lower = (SplatUndefZ - & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); - uint64_t Upper = (SplatUndefZ - & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); - uint64_t Value = SplatBitsZ | Upper | Lower; - SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, - SplatBitSize); - if (Op.getNode()) - return Op; - - // Now try assuming that any undefined bits between the first and - // last defined set bits are set. This increases the chances of - // using a non-wraparound mask. - uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; - Value = SplatBitsZ | Middle; - Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize); - if (Op.getNode()) - return Op; - } - // Fall back to loading it from memory. return SDValue(); } Index: test/CodeGen/SystemZ/vec-const-05.ll =================================================================== --- test/CodeGen/SystemZ/vec-const-05.ll +++ test/CodeGen/SystemZ/vec-const-05.ll @@ -1,28 +1,63 @@ -; Test vector byte masks, v4f32 version. Only all-zero vectors are handled. +; Test vector byte masks, v4f32 version. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test an all-zeros vector. -define <4 x float> @f0() { -; CHECK-LABEL: f0: +define <4 x float> @f1() { +; CHECK-LABEL: f1: ; CHECK: vgbm %v24, 0 ; CHECK: br %r14 ret <4 x float> zeroinitializer } -; Test that undefs are treated as zero. -define <4 x float> @f1() { -; CHECK-LABEL: f1: -; CHECK: vgbm %v24, 0 +; Test an all-ones vector. +define <4 x float> @f2() { +; CHECK-LABEL: f2: +; CHECK: vgbm %v24, 65535 ; CHECK: br %r14 - ret <4 x float> + ret <4 x float> +} + +; Test a mixed vector (mask 0xc731). +define <4 x float> @f3() { +; CHECK-LABEL: f3: +; CHECK: vgbm %v24, 50993 +; CHECK: br %r14 + ret <4 x float> +} + +; Test that undefs are treated as zero (mask 0xc031). +define <4 x float> @f4() { +; CHECK-LABEL: f4: +; CHECK: vgbm %v24, 49201 +; CHECK: br %r14 + ret <4 x float> +} + +; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. +define <4 x float> @f5() { +; CHECK-LABEL: f5: +; CHECK-NOT: vgbm +; CHECK: br %r14 + ret <4 x float> } ; Test an all-zeros v2f32 that gets promoted to v4f32. -define <2 x float> @f2() { -; CHECK-LABEL: f2: +define <2 x float> @f6() { +; CHECK-LABEL: f6: ; CHECK: vgbm %v24, 0 ; CHECK: br %r14 ret <2 x float> zeroinitializer } + +; Test a mixed v2f32 that gets promoted to v4f32 (mask 0xc700). +define <2 x float> @f7() { +; CHECK-LABEL: f7: +; CHECK: vgbm %v24, 50944 +; CHECK: br %r14 + ret <2 x float> +} Index: test/CodeGen/SystemZ/vec-const-06.ll =================================================================== --- test/CodeGen/SystemZ/vec-const-06.ll +++ test/CodeGen/SystemZ/vec-const-06.ll @@ -1,19 +1,43 @@ -; Test vector byte masks, v2f64 version. Only all-zero vectors are handled. +; Test vector byte masks, v2f64 version. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test an all-zeros vector. -define <2 x double> @f0() { -; CHECK-LABEL: f0: +define <2 x double> @f1() { +; CHECK-LABEL: f1: ; CHECK: vgbm %v24, 0 ; CHECK: br %r14 ret <2 x double> zeroinitializer } -; Test that undefs are treated as zero. -define <2 x double> @f1() { -; CHECK-LABEL: f1: -; CHECK: vgbm %v24, 0 +; Test an all-ones vector. +define <2 x double> @f2() { +; CHECK-LABEL: f2: +; CHECK: vgbm %v24, 65535 +; CHECK: br %r14 + ret <2 x double> +} + +; Test a mixed vector (mask 0x8c76). +define <2 x double> @f3() { +; CHECK-LABEL: f3: +; CHECK: vgbm %v24, 35958 +; CHECK: br %r14 + ret <2 x double> +} + +; Test that undefs are treated as zero (mask 0x8c00). +define <2 x double> @f4() { +; CHECK-LABEL: f4: +; CHECK: vgbm %v24, 35840 +; CHECK: br %r14 + ret <2 x double> +} + +; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. +define <2 x double> @f5() { +; CHECK-LABEL: f5: +; CHECK-NOT: vgbm ; CHECK: br %r14 - ret <2 x double> + ret <2 x double> } Index: test/CodeGen/SystemZ/vec-const-19.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/vec-const-19.ll @@ -0,0 +1,18 @@ +; Test that a scalar FP constant can be reused from a vector splat constant +; of the same value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +define void @fun() { +; CHECK-LABEL: fun: +; CHECK: vgmg %v0, 2, 10 +; CHECK-NOT: vgmg %v0, 2, 10 + + %tmp = fadd <2 x double> zeroinitializer, + %tmp1 = fmul <2 x double> %tmp, + store <2 x double> %tmp1, <2 x double>* undef + %tmp2 = load double, double* undef + %tmp3 = fmul double %tmp2, 5.000000e-01 + store double %tmp3, double* undef + ret void +}