diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h --- a/llvm/lib/Target/VE/VECustomDAG.h +++ b/llvm/lib/Target/VE/VECustomDAG.h @@ -29,6 +29,8 @@ bool isMaskType(EVT SomeVT); +bool isMaskArithmetic(SDValue Op); + bool isVVPOrVEC(unsigned); bool maySafelyIgnoreMask(SDValue Op); @@ -86,6 +88,11 @@ // Whether this type belongs to a packed mask or vector register. Packing getTypePacking(EVT); +enum class PackElem : int8_t { + Lo = 0, // Integer (63, 32] + Hi = 1 // Float (32, 0] +}; + class VECustomDAG { SelectionDAG &DAG; SDLoc DL; @@ -127,6 +134,11 @@ SDValue getUNDEF(EVT VT) const { return DAG.getUNDEF(VT); } /// } getNode + /// Packing { + SDValue getUnpack(EVT DestVT, SDValue Vec, PackElem Part, SDValue AVL); + SDValue getPack(EVT DestVT, SDValue LoVec, SDValue HiVec, SDValue AVL); + /// } Packing + SDValue getConstant(uint64_t Val, EVT VT, bool IsTarget = false, bool IsOpaque = false) const; diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp --- a/llvm/lib/Target/VE/VECustomDAG.cpp +++ b/llvm/lib/Target/VE/VECustomDAG.cpp @@ -41,6 +41,17 @@ return SomeVT.getVectorElementType() == MVT::i1; } +bool isMaskArithmetic(SDValue Op) { + switch (Op.getOpcode()) { + default: + return false; + case ISD::AND: + case ISD::XOR: + case ISD::OR: + return isMaskType(Op.getValueType()); + } +} + /// \returns the VVP_* SDNode opcode corresponsing to \p OC. Optional getVVPOpcode(unsigned Opcode) { switch (Opcode) { @@ -206,4 +217,18 @@ return getNode(VEISD::LEGALAVL, AVL.getValueType(), AVL); } +SDValue VECustomDAG::getUnpack(EVT DestVT, SDValue Vec, PackElem Part, + SDValue AVL) { + // TODO: Peek through VEC_PACK and VEC_BROADCAST(REPL_ ..) operands. + unsigned OC = + (Part == PackElem::Lo) ? VEISD::VEC_UNPACK_LO : VEISD::VEC_UNPACK_HI; + return DAG.getNode(OC, DL, DestVT, Vec, AVL); +} + +SDValue VECustomDAG::getPack(EVT DestVT, SDValue LoVec, SDValue HiVec, + SDValue AVL) { + // TODO: Peek through VEC_UNPACK_LO|HI operands. + return DAG.getNode(VEISD::VEC_PACK, DL, DestVT, LoVec, HiVec, AVL); +} + } // namespace llvm diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -38,8 +38,14 @@ MEMBARRIER, // Compiler barrier only; generate a no-op. RET_FLAG, // Return with a flag operand. TS1AM, // A TS1AM instruction used for 1/2 bytes swap. - VEC_BROADCAST, // A vector broadcast instruction. - // 0: scalar value, 1: VL + VEC_UNPACK_LO, // unpack the lo v256 slice of a packed v512 vector. + VEC_UNPACK_HI, // unpack the hi v256 slice of a packed v512 vector. + // 0: v512 vector, 1: AVL + VEC_PACK, // pack a lo and a hi vector into one v512 vector + // 0: v256 lo vector, 1: v256 hi vector, 2: AVL + + VEC_BROADCAST, // A vector broadcast instruction. + // 0: scalar value, 1: VL REPL_I32, REPL_F32, // Replicate subregister to other half. @@ -182,6 +188,7 @@ SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const; SDValue legalizeInternalVectorOp(SDValue Op, SelectionDAG &DAG) const; SDValue legalizePackedAVL(SDValue Op, VECustomDAG &CDAG) const; + SDValue splitMaskArithmetic(SDValue Op, SelectionDAG &DAG) const; /// } VVPLowering /// Custom DAGCombine { diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -299,6 +299,9 @@ for (MVT LegalMaskVT : AllMaskVTs) setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom); + for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR}) + setOperationAction(Opc, MVT::v512i1, Custom); + for (MVT LegalVecVT : AllVectorVTs) { setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal); @@ -903,6 +906,9 @@ TARGET_NODE_CASE(MEMBARRIER) TARGET_NODE_CASE(RET_FLAG) TARGET_NODE_CASE(TS1AM) + TARGET_NODE_CASE(VEC_UNPACK_LO) + TARGET_NODE_CASE(VEC_UNPACK_HI) + TARGET_NODE_CASE(VEC_PACK) TARGET_NODE_CASE(VEC_BROADCAST) TARGET_NODE_CASE(REPL_I32) TARGET_NODE_CASE(REPL_F32) @@ -1746,6 +1752,8 @@ // Translate into a VEC_*/VVP_* layer operation. #define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME: #include "VVPNodes.def" + if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType())) + return splitMaskArithmetic(Op, DAG); return lowerToVVP(Op, DAG); } } @@ -2690,6 +2698,23 @@ return true; } +SDValue VETargetLowering::splitMaskArithmetic(SDValue Op, + SelectionDAG &DAG) const { + VECustomDAG CDAG(DAG, Op); + SDValue AVL = + CDAG.getConstant(Op.getValueType().getVectorNumElements(), MVT::i32); + SDValue A = Op->getOperand(0); + SDValue B = Op->getOperand(1); + SDValue LoA = CDAG.getUnpack(MVT::v256i1, A, PackElem::Lo, AVL); + SDValue HiA = CDAG.getUnpack(MVT::v256i1, A, PackElem::Hi, AVL); + SDValue LoB = CDAG.getUnpack(MVT::v256i1, B, PackElem::Lo, AVL); + SDValue HiB = CDAG.getUnpack(MVT::v256i1, B, PackElem::Hi, AVL); + unsigned Opc = Op.getOpcode(); + auto LoRes = CDAG.getNode(Opc, MVT::v256i1, {LoA, LoB}); + auto HiRes = CDAG.getNode(Opc, MVT::v256i1, {HiA, HiB}); + return CDAG.getPack(MVT::v512i1, LoRes, HiRes, AVL); +} + SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const { // Can we represent this as a VVP node. const unsigned Opcode = Op->getOpcode(); diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -2293,6 +2293,18 @@ def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2, [SDTCisVec<0>, IsVLVT<2>]>>; +///// Packed mode Support ///// +// unpack the lo part of this vector +def vec_unpack_lo : SDNode<"VEISD::VEC_UNPACK_LO", SDTypeProfile<1, 2, + [SDTCisVec<0>, SDTCisVec<1>, IsVLVT<2>]>>; +// unpack the hipart of this vector +def vec_unpack_hi : SDNode<"VEISD::VEC_UNPACK_HI", SDTypeProfile<1, 2, + [SDTCisVec<0>, SDTCisVec<1>, IsVLVT<2>]>>; +// re-pack v256i32, v256f32 back into tone v512.32 +def vec_pack : SDNode<"VEISD::VEC_PACK", SDTypeProfile<1, 3, + [SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, + SDTCisSameNumEltsAs<1,2>, IsVLVT<3>]>>; + // replicate lower 32bit to upper 32bit (f32 scalar replication). def repl_f32 : SDNode<"VEISD::REPL_F32", SDTypeProfile<1, 1, diff --git a/llvm/lib/Target/VE/VEInstrPatternsVec.td b/llvm/lib/Target/VE/VEInstrPatternsVec.td --- a/llvm/lib/Target/VE/VEInstrPatternsVec.td +++ b/llvm/lib/Target/VE/VEInstrPatternsVec.td @@ -112,3 +112,16 @@ def: Mask_Binary; def: Mask_Binary; def: Mask_Binary; + +///// Packing support ///// + +// v256i1 <> v512i1 +def : Pat<(v256i1 (vec_unpack_lo v512i1:$vm, (i32 srcvalue))), + (EXTRACT_SUBREG $vm, sub_vm_odd)>; +def : Pat<(v256i1 (vec_unpack_hi v512i1:$vm, (i32 srcvalue))), + (EXTRACT_SUBREG $vm, sub_vm_even)>; +def : Pat<(v512i1 (vec_pack v256i1:$vlo, v256i1:$vhi, (i32 srcvalue))), + (INSERT_SUBREG (INSERT_SUBREG + (v512i1 (IMPLICIT_DEF)), + $vlo, sub_vm_odd), + $vhi, sub_vm_even)>; diff --git a/llvm/test/CodeGen/VE/Packed/mask_binary.ll b/llvm/test/CodeGen/VE/Packed/mask_binary.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Packed/mask_binary.ll @@ -0,0 +1,42 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s + +; Function Attrs: nounwind +define fastcc <512 x i1> @and_mm_v512i1(<512 x i1> %x, <512 x i1> %y) { +; CHECK-LABEL: and_mm_v512i1: +; CHECK: # %bb.0: +; CHECK-NEXT: andm %vm6, %vm2, %vm4 +; CHECK-NEXT: andm %vm7, %vm3, %vm5 +; CHECK-NEXT: andm %vm2, %vm0, %vm6 +; CHECK-NEXT: andm %vm3, %vm0, %vm7 +; CHECK-NEXT: b.l.t (, %s10) + %z = and <512 x i1> %x, %y + ret <512 x i1> %z +} + +; Function Attrs: nounwind +define fastcc <512 x i1> @or_mm_v512i1(<512 x i1> %x, <512 x i1> %y) { +; CHECK-LABEL: or_mm_v512i1: +; CHECK: # %bb.0: +; CHECK-NEXT: orm %vm6, %vm2, %vm4 +; CHECK-NEXT: orm %vm7, %vm3, %vm5 +; CHECK-NEXT: andm %vm2, %vm0, %vm6 +; CHECK-NEXT: andm %vm3, %vm0, %vm7 +; CHECK-NEXT: b.l.t (, %s10) + %z = or <512 x i1> %x, %y + ret <512 x i1> %z +} + +; Function Attrs: nounwind +define fastcc <512 x i1> @xor_mm_v512i1(<512 x i1> %x, <512 x i1> %y) { +; CHECK-LABEL: xor_mm_v512i1: +; CHECK: # %bb.0: +; CHECK-NEXT: xorm %vm6, %vm2, %vm4 +; CHECK-NEXT: xorm %vm7, %vm3, %vm5 +; CHECK-NEXT: andm %vm2, %vm0, %vm6 +; CHECK-NEXT: andm %vm3, %vm0, %vm7 +; CHECK-NEXT: b.l.t (, %s10) + %z = xor <512 x i1> %x, %y + ret <512 x i1> %z +} +