diff --git a/llvm/lib/Target/VE/VE.h b/llvm/lib/Target/VE/VE.h --- a/llvm/lib/Target/VE/VE.h +++ b/llvm/lib/Target/VE/VE.h @@ -370,5 +370,8 @@ inline unsigned M0(unsigned Val) { return Val + 64; } inline unsigned M1(unsigned Val) { return Val; } +static const unsigned StandardVectorWidth = 256; +static const unsigned PackedVectorWidth = 512; + } // namespace llvm #endif diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h --- a/llvm/lib/Target/VE/VECustomDAG.h +++ b/llvm/lib/Target/VE/VECustomDAG.h @@ -27,6 +27,8 @@ bool isPackedVectorType(EVT SomeVT); +bool isMaskType(EVT SomeVT); + bool isVVPOrVEC(unsigned); bool maySafelyIgnoreMask(SDValue Op); @@ -73,6 +75,17 @@ /// } AVL Functions +enum class Packing { + Normal = 0, // 256 element standard mode. + Dense = 1 // 512 element packed mode. +}; + +// Get the vector or mask register type for this packing and element type. +MVT getLegalVectorType(Packing P, MVT ElemVT); + +// Whether this type belongs to a packed mask or vector register. +Packing getTypePacking(EVT); + class VECustomDAG { SelectionDAG &DAG; SDLoc DL; @@ -117,6 +130,8 @@ SDValue getConstant(uint64_t Val, EVT VT, bool IsTarget = false, bool IsOpaque = false) const; + SDValue getConstantMask(Packing Packing, bool AllTrue) const; + SDValue getMaskBroadcast(EVT ResultVT, SDValue Scalar, SDValue AVL) const; SDValue getBroadcast(EVT ResultVT, SDValue Scalar, SDValue AVL) const; // Wrap AVL in a LEGALAVL node (unless it is one already). diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp --- a/llvm/lib/Target/VE/VECustomDAG.cpp +++ b/llvm/lib/Target/VE/VECustomDAG.cpp @@ -19,14 +19,28 @@ namespace llvm { -static const int StandardVectorWidth = 256; - bool isPackedVectorType(EVT SomeVT) { if (!SomeVT.isVector()) return false; return SomeVT.getVectorNumElements() > StandardVectorWidth; } +MVT getLegalVectorType(Packing P, MVT ElemVT) { + return MVT::getVectorVT(ElemVT, P == Packing::Normal ? StandardVectorWidth + : PackedVectorWidth); +} + +Packing getTypePacking(EVT VT) { + assert(VT.isVector()); + return isPackedVectorType(VT) ? Packing::Dense : Packing::Normal; +} + +bool isMaskType(EVT SomeVT) { + if (!SomeVT.isVector()) + return false; + return SomeVT.getVectorElementType() == MVT::i1; +} + /// \returns the VVP_* SDNode opcode corresponsing to \p OC. Optional getVVPOpcode(unsigned Opcode) { switch (Opcode) { @@ -121,11 +135,55 @@ return DAG.getConstant(Val, DL, VT, IsTarget, IsOpaque); } +SDValue VECustomDAG::getConstantMask(Packing Packing, bool AllTrue) const { + auto MaskVT = getLegalVectorType(Packing, MVT::i1); + + // VEISelDAGtoDAG will replace this pattern with the constant-true VM. + auto TrueVal = DAG.getConstant(-1, DL, MVT::i32); + auto AVL = getConstant(MaskVT.getVectorNumElements(), MVT::i32); + auto Res = getNode(VEISD::VEC_BROADCAST, MaskVT, {TrueVal, AVL}); + if (AllTrue) + return Res; + + return DAG.getNOT(DL, Res, Res.getValueType()); +} + +SDValue VECustomDAG::getMaskBroadcast(EVT ResultVT, SDValue Scalar, + SDValue AVL) const { + // Constant mask splat. + if (auto BcConst = dyn_cast(Scalar)) + return getConstantMask(getTypePacking(ResultVT), + BcConst->getSExtValue() != 0); + + // Expand the broadcast to a vector comparison. + auto ScalarBoolVT = Scalar.getSimpleValueType(); + assert(ScalarBoolVT == MVT::i32); + + // Cast to i32 ty. + SDValue CmpElem = DAG.getSExtOrTrunc(Scalar, DL, MVT::i32); + unsigned ElemCount = ResultVT.getVectorNumElements(); + MVT CmpVecTy = MVT::getVectorVT(ScalarBoolVT, ElemCount); + + // Broadcast to vector. + SDValue BCVec = + DAG.getNode(VEISD::VEC_BROADCAST, DL, CmpVecTy, {CmpElem, AVL}); + SDValue ZeroVec = + getBroadcast(CmpVecTy, {DAG.getConstant(0, DL, ScalarBoolVT)}, AVL); + + MVT BoolVecTy = MVT::getVectorVT(MVT::i1, ElemCount); + + // Broadcast(Data) != Broadcast(0) + // TODO: Use a VVP operation for this. + return DAG.getSetCC(DL, BoolVecTy, BCVec, ZeroVec, ISD::CondCode::SETNE); +} + SDValue VECustomDAG::getBroadcast(EVT ResultVT, SDValue Scalar, SDValue AVL) const { assert(ResultVT.isVector()); auto ScaVT = Scalar.getValueType(); - assert(ScaVT != MVT::i1 && "TODO: Mask broadcasts"); + + if (isMaskType(ResultVT)) + return getMaskBroadcast(ResultVT, Scalar, AVL); if (isPackedVectorType(ResultVT)) { // v512x packed mode broadcast diff --git a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp --- a/llvm/lib/Target/VE/VEISelDAGToDAG.cpp +++ b/llvm/lib/Target/VE/VEISelDAGToDAG.cpp @@ -10,6 +10,7 @@ // //===----------------------------------------------------------------------===// +#include "VE.h" #include "VETargetMachine.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" @@ -341,6 +342,36 @@ ReplaceNode(N, N->getOperand(0).getNode()); return; + // Lower (broadcast 1) and (broadcast 0) to VM[P]0 + case VEISD::VEC_BROADCAST: { + MVT SplatResTy = N->getSimpleValueType(0); + if (SplatResTy.getVectorElementType() != MVT::i1) + break; + + // Constant non-zero broadcast. + auto BConst = dyn_cast(N->getOperand(0)); + if (!BConst) + break; + bool BCTrueMask = (BConst->getSExtValue() != 0); + if (!BCTrueMask) + break; + + // Packed or non-packed. + SDValue New; + if (SplatResTy.getVectorNumElements() == StandardVectorWidth) { + New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(N), VE::VM0, + MVT::v256i1); + } else if (SplatResTy.getVectorNumElements() == PackedVectorWidth) { + New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), SDLoc(N), VE::VMP0, + MVT::v512i1); + } else + break; + + // Replace. + ReplaceNode(N, New.getNode()); + return; + } + case VEISD::GLOBAL_BASE_REG: ReplaceNode(N, getGlobalBaseReg()); return; diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -76,6 +76,8 @@ static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64, MVT::v256f32, MVT::v512f32, MVT::v256f64}; +static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1}; + static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32}; void VETargetLowering::initRegisterClasses() { @@ -294,6 +296,9 @@ } void VETargetLowering::initVPUActions() { + for (MVT LegalMaskVT : AllMaskVTs) + setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom); + for (MVT LegalVecVT : AllVectorVTs) { setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal); @@ -1661,7 +1666,7 @@ if (SDValue ScalarV = getSplatValue(Op.getNode())) { unsigned NumEls = ResultVT.getVectorNumElements(); auto AVL = CDAG.getConstant(NumEls, MVT::i32); - return CDAG.getBroadcast(ResultVT, Op.getOperand(0), AVL); + return CDAG.getBroadcast(ResultVT, ScalarV, AVL); } // Expand @@ -2696,9 +2701,9 @@ // The representative and legalized vector type of this operation. VECustomDAG CDAG(DAG, Op); - MVT MaskVT = MVT::v256i1; // TODO: packed mode. EVT OpVecVT = Op.getValueType(); EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT); + auto Packing = getTypePacking(LegalVecVT.getSimpleVT()); SDValue AVL; SDValue Mask; @@ -2713,8 +2718,7 @@ } else { // Materialize the VL parameter. AVL = CDAG.getConstant(OpVecVT.getVectorNumElements(), MVT::i32); - SDValue ConstTrue = CDAG.getConstant(1, MVT::i32); - Mask = CDAG.getBroadcast(MaskVT, ConstTrue, AVL); + Mask = CDAG.getConstantMask(Packing, true); } if (isVVPBinaryOp(VVPOpcode)) { diff --git a/llvm/test/CodeGen/VE/Vector/mask_broadcast.ll b/llvm/test/CodeGen/VE/Vector/mask_broadcast.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/mask_broadcast.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s + +define fastcc <256 x i1> @brd_v256i1_s(i1 %s) { +; CHECK-LABEL: brd_v256i1_s: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vbrd %v0, %s0 +; CHECK-NEXT: vbrd %v1, 0 +; CHECK-NEXT: vcmpu.w %v0, %v0, %v1 +; CHECK-NEXT: vfmk.w.ne %vm1, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %val = insertelement <256 x i1> undef, i1 %s, i32 0 + %ret = shufflevector <256 x i1> %val, <256 x i1> undef, <256 x i32> zeroinitializer + ret <256 x i1> %ret +} + +define fastcc <256 x i1> @brd_v256i1_zero() { +; CHECK-LABEL: brd_v256i1_zero: +; CHECK: # %bb.0: +; CHECK-NEXT: xorm %vm1, %vm0, %vm0 +; CHECK-NEXT: b.l.t (, %s10) + %val = insertelement <256 x i1> undef, i1 0, i32 0 + %ret = shufflevector <256 x i1> %val, <256 x i1> undef, <256 x i32> zeroinitializer + ret <256 x i1> %ret +} + +define fastcc <256 x i1> @brd_v256i1_one() { +; CHECK-LABEL: brd_v256i1_one: +; CHECK: # %bb.0: +; CHECK-NEXT: andm %vm1, %vm0, %vm0 +; CHECK-NEXT: b.l.t (, %s10) + %val = insertelement <256 x i1> undef, i1 1, i32 0 + %ret = shufflevector <256 x i1> %val, <256 x i1> undef, <256 x i32> zeroinitializer + ret <256 x i1> %ret +}