diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -39,6 +39,10 @@ CALL, // A call instruction. RET_FLAG, // Return with a flag operand. GLOBAL_BASE_REG, // Global base reg for PIC. + + // VVP_* nodes. +#define ADD_VVP_OP(VVP_NAME) VVP_NAME, +#include "VVPNodes.inc" }; } @@ -120,6 +124,10 @@ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; /// } Custom Lower + /// VVP Lowering { + SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const; + /// } VVPLowering + /// Custom DAGCombine { SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -254,8 +254,16 @@ } void VETargetLowering::initVPUActions() { - for (MVT LegalVecVT : AllVectorVTs) + for (MVT LegalVecVT : AllVectorVTs) { setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom); + // Translate all vector instructions with legal element types to VVP_* nodes + // TODO we will custom-widen into VVP_* nodes in the future. While we are + // buildling the infrastructure for this, we only do this for legal vector + // VTs. +#define REGISTER_VVP_OP(VVP_NAME, ISD_NAME) \ + setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom); +#include "VVPNodes.inc" + } } SDValue @@ -871,6 +879,10 @@ TARGET_NODE_CASE(VEC_BROADCAST) TARGET_NODE_CASE(RET_FLAG) TARGET_NODE_CASE(GLOBAL_BASE_REG) + + // Register the VVP_* SDNodes. +#define ADD_VVP_OP(VVP_NAME) TARGET_NODE_CASE(VVP_NAME) +#include "VVPNodes.inc" } #undef TARGET_NODE_CASE return nullptr; @@ -1428,6 +1440,10 @@ return lowerVASTART(Op, DAG); case ISD::VAARG: return lowerVAARG(Op, DAG); + +#define REGISTER_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME: +#include "VVPNodes.inc" + return lowerToVVP(Op, DAG); } } /// } Custom Lower @@ -1654,3 +1670,53 @@ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } + +/// \returns the VVP_* SDNode opcode corresponsing to \p OC. +static Optional getVVPOpcode(unsigned OC) { + switch (OC) { +#define REGISTER_VVP_OP(VVPNAME, SDNAME) \ + case VEISD::VVPNAME: \ + case ISD::SDNAME: \ + return VEISD::VVPNAME; +#include "VVPNodes.inc" + } + return None; +} + +SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const { + // Can we represent this as a VVP node. + auto OCOpt = getVVPOpcode(Op->getOpcode()); + if (!OCOpt.hasValue()) + return SDValue(); + unsigned VVPOC = OCOpt.getValue(); + + // The representative and legalized vector type of this operation. + EVT OpVecVT = Op.getValueType(); + EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT); + + // Materialize the VL parameter. + SDLoc DL(Op); + SDValue AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32); + MVT MaskVT = MVT::v256i1; + SDValue ConstTrue = DAG.getConstant(-1, DL, MVT::i32); + SDValue Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT, + ConstTrue); // emit a VEISD::VEC_BROADCAST here. + + // Categories we are interested in. + bool IsBinaryOp = false; + + switch (VVPOC) { +#define REGISTER_BINARY_VVP_OP(VVPNAME, ...) \ + case VEISD::VVPNAME: \ + IsBinaryOp = true; \ + break; +#include "VVPNodes.inc" + } + + if (IsBinaryOp) { + assert(LegalVecVT.isSimple()); + return DAG.getNode(VVPOC, DL, LegalVecVT, Op->getOperand(0), + Op->getOperand(1), Mask, AVL); + } + llvm_unreachable("lowerToVVP called for unexpected SDNode."); +} diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td --- a/llvm/lib/Target/VE/VEInstrInfo.td +++ b/llvm/lib/Target/VE/VEInstrInfo.td @@ -245,6 +245,8 @@ == 0; }]>; def fplozero : PatLeaf<(fpimm), [{ return (getFpImmVal(N) & 0xffffffff) == 0; }]>; +def True : PatLeaf<(imm), [{ return N->getSExtValue() !=0 ; }]>; + def CCSIOp : PatLeaf<(cond), [{ switch (N->get()) { @@ -2219,6 +2221,19 @@ def : Pat<(i64 (and i64:$val, 0xffffffff)), (ANDrm $val, !add(32, 64))>; +//===----------------------------------------------------------------------===// +// Vector Instruction Pattern Stuff +//===----------------------------------------------------------------------===// + +// Custom intermediate ISDs. +class IsVLVT : SDTCisVT; +def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2, [SDTCisVec<0>, IsVLVT<2>]>>; + +// Whether this is an all-true mask (assuming the undef-bits above VL are all-true). +def true_mask : PatLeaf<(vec_broadcast (i32 True), (i32 srcvalue))>; +// Match any broadcast (ignoring VL). +def any_broadcast : PatFrag<(ops node:$sx), (vec_broadcast node:$sx, (i32 srcvalue))>; + // Vector instructions. include "VEInstrVec.td" @@ -2227,3 +2242,6 @@ // Patterns and intermediate SD nodes (VEC_*). include "VEInstrPatternsVec.td" + +// Patterns and intermediate SD nodes (VVP_*). +include "VVPInstrPatternsVec.td" diff --git a/llvm/lib/Target/VE/VEInstrPatternsVec.td b/llvm/lib/Target/VE/VEInstrPatternsVec.td --- a/llvm/lib/Target/VE/VEInstrPatternsVec.td +++ b/llvm/lib/Target/VE/VEInstrPatternsVec.td @@ -15,10 +15,6 @@ // Instruction format superclass //===----------------------------------------------------------------------===// -// Custom intermediate ISDs. -class IsVLVT : SDTCisVT; -def vec_broadcast : SDNode<"VEISD::VEC_BROADCAST", SDTypeProfile<1, 2, [SDTCisVec<0>, IsVLVT<2>]>>; - multiclass vbrd_elem32 { // VBRDil def : Pat<(v32 (vec_broadcast (s32 ImmOp:$sy), i32:$vl)), diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VVPInstrInfo.td @@ -0,0 +1,38 @@ +//===---------- VVPInstrInfo.td - VVP_* SDNode patterns----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the VE Vector Predicated SDNodes (VVP SDNodes). +// VVP SDNodes are an intermediate isel layer between the vector SDNodes emitted by LLVM +// and the actual VE vector instructions. For example: +// +// ADD(x,y) --> VVP_ADD(x,y,mask,evl) --> VADDSWSXrvml(x,y,mask,evl) +// ^ ^ ^ +// The standard The VVP layer SDNode. The VE vector instruction. +// SDNode. +// +// TODO explain how VVP nodes relate to VP SDNodes once VP ISel is uptream. +//===----------------------------------------------------------------------===// + +// Binary Operators { + +// BinaryOp(x,y,mask,vl) +def SDTIntBinOpVVP : SDTypeProfile<1, 4, [ // vp_add, vp_and, etc. + SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisSameNumEltsAs<0, 3>, IsVLVT<4> +]>; + +// Binary operator commutative pattern. +class vvp_commutative : + PatFrags<(ops node:$lhs, node:$rhs, node:$mask, node:$vlen), + [(RootOp node:$lhs, node:$rhs, node:$mask, node:$vlen), + (RootOp node:$rhs, node:$lhs, node:$mask, node:$vlen)]>; + +// VVP node definitions. +def vvp_add : SDNode<"VEISD::VVP_ADD", SDTIntBinOpVVP>; +def c_vvp_add : vvp_commutative; + +// } Binary Operators diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td @@ -0,0 +1,47 @@ +//===----------- VVPInstrPatternsVec.td - VVP_* SDNode patterns -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file describes how VVP_* SDNodes are lowered to machine instructions. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// +// VVP SDNode definitions. +// +//===----------------------------------------------------------------------===// +include "VVPInstrInfo.td" + +multiclass VectorBinaryArith { + // No mask. + def : Pat<(OpNode (any_broadcast (ScalarVT ImmOp:$sx)), DataVT:$vy, (MaskVT true_mask), i32:$avl), + (!cast(OpBaseName#"ivl") (ImmCast $sx), $vy, $avl)>; + def : Pat<(OpNode (any_broadcast ScalarVT:$sx), DataVT:$vy, (MaskVT true_mask), i32:$avl), + (!cast(OpBaseName#"rvl") ScalarVT:$sx, $vy, $avl)>; + def : Pat<(OpNode DataVT:$vx, DataVT:$vy, (MaskVT true_mask), i32:$avl), + (!cast(OpBaseName#"vvl") $vx, $vy, $avl)>; + + // Mask. + def : Pat<(OpNode (any_broadcast ScalarVT:$sx), DataVT:$vy, MaskVT:$mask, i32:$avl), + (!cast(OpBaseName#"rvml") ScalarVT:$sx, $vy, $mask, $avl)>; + def : Pat<(OpNode DataVT:$vx, DataVT:$vy, MaskVT:$mask, i32:$avl), + (!cast(OpBaseName#"vvml") $vx, $vy, $mask, $avl)>; + + // TODO immediate variants. + // TODO Fold vvp_select into passthru. +} + +// Expand both 64bit and 32 bit variant (256 elements) +multiclass VectorBinaryArith_ShortLong { + defm : VectorBinaryArith; + defm : VectorBinaryArith; +} + + +defm : VectorBinaryArith_ShortLong; + diff --git a/llvm/lib/Target/VE/VVPNodes.inc b/llvm/lib/Target/VE/VVPNodes.inc new file mode 100644 --- /dev/null +++ b/llvm/lib/Target/VE/VVPNodes.inc @@ -0,0 +1,39 @@ +//===-- VVPNodes.def - Lists & properties of VE Vector Predication Nodes --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines all VVP_* SDNodes and their properties +// +//===----------------------------------------------------------------------===// + +/// ADD_VVP_OP(X) +/// \p X is a new VVP SDNode. +#ifndef ADD_VVP_OP +#define ADD_VVP_OP(X) +#endif + +/// REGISTER_VVP_OP(VVPNAME,SDNAME) +/// \p VVPName is a VVP SDNode operator. +/// \p SDNAME is the generic SD opcode corresponding to \p VVPName. +#ifndef REGISTER_VVP_OP +#define REGISTER_VVP_OP(X, Y) +#endif + +/// REGISTER_BINARY_VVP_OP(VVPNAME,SDNAME) +/// \p VVPName is a VVP Binary operator. +/// \p SDNAME is the generic SD opcode corresponding to \p VVPName. +#ifndef REGISTER_BINARY_VVP_OP +#define REGISTER_BINARY_VVP_OP(X,Y) REGISTER_VVP_OP(X,Y) +#endif + +// Integer arithmetic. +ADD_VVP_OP(VVP_ADD) REGISTER_BINARY_VVP_OP(VVP_ADD,ADD) + + +#undef REGISTER_BINARY_VVP_OP +#undef REGISTER_VVP_OP +#undef ADD_VVP_OP diff --git a/llvm/test/CodeGen/VE/Vector/vec_add.ll b/llvm/test/CodeGen/VE/Vector/vec_add.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vec_add.ll @@ -0,0 +1,188 @@ +; RUN: llc < %s -mtriple=ve -mattr=+vpu | FileCheck %s + +; <256 x i32> + +; Function Attrs: nounwind +define fastcc <256 x i32> @add_vv_v256i32(<256 x i32> %x, <256 x i32> %y) { +; CHECK-LABEL: add_vv_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vadds.w.sx %v0, %v0, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %z = add <256 x i32> %x, %y + ret <256 x i32> %z +} + +; Function Attrs: nounwind +define fastcc <256 x i32> @add_sv_v256i32(i32 %x, <256 x i32> %y) { +; CHECK-LABEL: add_sv_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vadds.w.sx %v0, %s0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i32> undef, i32 %x, i32 0 + %vx = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer + %z = add <256 x i32> %vx, %y + ret <256 x i32> %z +} + +; Function Attrs: nounwind +define fastcc <256 x i32> @add_vs_v256i32(<256 x i32> %x, i32 %y) { +; CHECK-LABEL: add_vs_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: adds.w.sx %s0, %s0, (0)1 +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vadds.w.sx %v0, %s0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x i32> undef, i32 %y, i32 0 + %vy = shufflevector <256 x i32> %yins, <256 x i32> undef, <256 x i32> zeroinitializer + %z = add <256 x i32> %x, %vy + ret <256 x i32> %z +} + +; Function Attrs: nounwind +define fastcc <256 x i32> @add_iv_v256i32(<256 x i32> %y) { +; CHECK-LABEL: add_iv_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vadds.w.sx %v0, 1, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i32> undef, i32 1, i32 0 + %vx = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer + %z = add <256 x i32> %vx, %y + ret <256 x i32> %z +} + +; Function Attrs: nounwind +define fastcc <256 x i32> @add_vi_v256i32(<256 x i32> %x) { +; CHECK-LABEL: add_vi_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vadds.w.sx %v0, 1, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x i32> undef, i32 1, i32 0 + %vy = shufflevector <256 x i32> %yins, <256 x i32> undef, <256 x i32> zeroinitializer + %z = add <256 x i32> %x, %vy + ret <256 x i32> %z +} + + + +; <256 x i64> + +; Function Attrs: nounwind +define fastcc <256 x i64> @add_vv_v256i64(<256 x i64> %x, <256 x i64> %y) { +; CHECK-LABEL: add_vv_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vadds.l %v0, %v0, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %z = add <256 x i64> %x, %y + ret <256 x i64> %z +} + +; Function Attrs: nounwind +define fastcc <256 x i64> @add_sv_v256i64(i64 %x, <256 x i64> %y) { +; CHECK-LABEL: add_sv_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vadds.l %v0, %s0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i64> undef, i64 %x, i32 0 + %vx = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer + %z = add <256 x i64> %vx, %y + ret <256 x i64> %z +} + +; Function Attrs: nounwind +define fastcc <256 x i64> @add_vs_v256i64(<256 x i64> %x, i64 %y) { +; CHECK-LABEL: add_vs_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vadds.l %v0, %s0, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x i64> undef, i64 %y, i32 0 + %vy = shufflevector <256 x i64> %yins, <256 x i64> undef, <256 x i32> zeroinitializer + %z = add <256 x i64> %x, %vy + ret <256 x i64> %z +} + +; Function Attrs: nounwind +define fastcc <256 x i64> @add_iv_v256i64(<256 x i64> %y) { +; CHECK-LABEL: add_iv_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vadds.l %v0, 1, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i64> undef, i64 1, i32 0 + %vx = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer + %z = add <256 x i64> %vx, %y + ret <256 x i64> %z +} + +; Function Attrs: nounwind +define fastcc <256 x i64> @add_vi_v256i64(<256 x i64> %x) { +; CHECK-LABEL: add_vi_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vadds.l %v0, 1, %v0 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x i64> undef, i64 1, i32 0 + %vy = shufflevector <256 x i64> %yins, <256 x i64> undef, <256 x i32> zeroinitializer + %z = add <256 x i64> %x, %vy + ret <256 x i64> %z +} + +; <128 x i64> +; We expect this to be widened. + +; Function Attrs: nounwind +define fastcc <128 x i64> @add_vv_v128i64(<128 x i64> %x, <128 x i64> %y) { +; CHECK-LABEL: add_vv_v128i64: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vadds.l %v0, %v0, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %z = add <128 x i64> %x, %y + ret <128 x i64> %z +} + +; <256 x i16> +; We expect promotion. + +; Function Attrs: nounwind +define fastcc <256 x i16> @add_vv_v256i16(<256 x i16> %x, <256 x i16> %y) { +; CHECK-LABEL: add_vv_v256i16: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s0, 256 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vadds.w.sx %v0, %v0, %v1 +; CHECK-NEXT: b.l.t (, %s10) + %z = add <256 x i16> %x, %y + ret <256 x i16> %z +} + +; <128 x i16> +; We expect this to be scalarized (for now). + +; Function Attrs: nounwind +define fastcc <128 x i16> @add_vv_v128i16(<128 x i16> %x, <128 x i16> %y) { +; CHECK-LABEL: add_vv_v128i16: +; CHECK: # %bb.0: +; CHECK-NOT: vadd + %z = add <128 x i16> %x, %y + ret <128 x i16> %z +} +