diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1196,6 +1196,15 @@ /// For example ISD::AND for ISD::VECREDUCE_AND. NodeType getVecReduceBaseOpcode(unsigned VecReduceOpcode); +/// Whether this is a vector-predicated Opcode. +bool isVPOpcode(unsigned Opcode); + +/// The operand position of the vector mask. +Optional getVPMaskIdx(unsigned Opcode); + +/// The operand position of the explicit vector length parameter. +Optional getVPExplicitVectorLengthIdx(unsigned Opcode); + //===--------------------------------------------------------------------===// /// MemIndexedMode enum - This enum defines the load / store indexed /// addressing modes. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -377,6 +377,41 @@ } } +bool ISD::isVPOpcode(unsigned Opcode) { + switch (Opcode) { + default: + return false; +#define BEGIN_REGISTER_VP_SDNODE(SDOPC, ...) \ + case ISD::SDOPC: \ + return true; +#include "llvm/IR/VPIntrinsics.def" + } +} + +/// The operand position of the vector mask. +Optional ISD::getVPMaskIdx(unsigned Opcode) { + switch (Opcode) { + default: + return None; +#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, ...) \ + case ISD::SDOPC: \ + return MASKPOS; +#include "llvm/IR/VPIntrinsics.def" + } +} + +/// The operand position of the explicit vector length parameter. +Optional ISD::getVPExplicitVectorLengthIdx(unsigned Opcode) { + switch (Opcode) { + default: + return None; +#define BEGIN_REGISTER_VP_SDNODE(SDOPC, LEGALPOS, TDNAME, MASKPOS, EVLPOS) \ + case ISD::SDOPC: \ + return EVLPOS; +#include "llvm/IR/VPIntrinsics.def" + } +} + ISD::NodeType ISD::getExtForLoadExtType(bool IsFP, ISD::LoadExtType ExtType) { switch (ExtType) { case ISD::EXTLOAD: diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -285,6 +285,8 @@ // TODO We will custom-widen into VVP_* nodes in the future. While we are // buildling the infrastructure for this, we only do this for legal vector // VTs. +#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \ + setOperationAction(ISD::VP_OPC, LegalVecVT, Custom); #define ADD_VVP_OP(VVP_NAME, ISD_NAME) \ setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom); #include "VVPNodes.def" @@ -1561,7 +1563,11 @@ } SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { - switch (Op.getOpcode()) { + unsigned Opcode = Op.getOpcode(); + if (ISD::isVPOpcode(Opcode)) + return lowerToVVP(Op, DAG); + + switch (Opcode) { default: llvm_unreachable("Should not custom lower this!"); case ISD::ATOMIC_FENCE: @@ -1877,6 +1883,9 @@ /// \returns the VVP_* SDNode opcode corresponsing to \p OC. static Optional getVVPOpcode(unsigned OC) { switch (OC) { +#define HANDLE_VP_TO_VVP(VPOPC, VVPNAME) \ + case ISD::VPOPC: \ + return VEISD::VVPNAME; #define ADD_VVP_OP(VVPNAME, SDNAME) \ case VEISD::VVPNAME: \ case ISD::SDNAME: \ @@ -1888,22 +1897,36 @@ SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const { // Can we represent this as a VVP node. - auto OCOpt = getVVPOpcode(Op->getOpcode()); + const unsigned Opcode = Op->getOpcode(); + auto OCOpt = getVVPOpcode(Opcode); if (!OCOpt.hasValue()) return SDValue(); unsigned VVPOC = OCOpt.getValue(); + const bool FromVP = ISD::isVPOpcode(Opcode); // The representative and legalized vector type of this operation. + SDLoc DL(Op); + MVT MaskVT = MVT::v256i1; // TODO: packed mode. EVT OpVecVT = Op.getValueType(); EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT); - // Materialize the VL parameter. - SDLoc DL(Op); - SDValue AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32); - MVT MaskVT = MVT::v256i1; - SDValue ConstTrue = DAG.getConstant(1, DL, MVT::i32); - SDValue Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT, - ConstTrue); // emit a VEISD::VEC_BROADCAST here. + SDValue AVL; + SDValue Mask; + + if (FromVP) { + // All upstream VP SDNodes always have a mask and avl. + auto MaskIdx = ISD::getVPMaskIdx(Opcode).getValue(); + auto AVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode).getValue(); + Mask = Op->getOperand(MaskIdx); + AVL = Op->getOperand(AVLIdx); + + } else { + // Materialize the VL parameter. + AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32); + SDValue ConstTrue = DAG.getConstant(1, DL, MVT::i32); + Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT, + ConstTrue); // emit a VEISD::VEC_BROADCAST here. + } // Categories we are interested in. bool IsBinaryOp = false; diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def --- a/llvm/lib/Target/VE/VVPNodes.def +++ b/llvm/lib/Target/VE/VVPNodes.def @@ -10,6 +10,13 @@ // //===----------------------------------------------------------------------===// +/// HANDLE_VP_TO_VVP(VPOPC, VVPOPC) +/// \p VPOPC is the VP_* SDNode opcode. +/// \p VVPOPC is the VVP_* SDNode opcode. +#ifndef HANDLE_VP_TO_VVP +#define HANDLE_VP_TO_VVP(VPOPC, VVPOPC) +#endif + /// ADD_VVP_OP(VVPNAME,SDNAME) /// \p VVPName is a VVP SDNode operator. /// \p SDNAME is the generic SD opcode corresponding to \p VVPName. @@ -21,7 +28,7 @@ /// \p VVPName is a VVP Binary operator. /// \p SDNAME is the generic SD opcode corresponding to \p VVPName. #ifndef ADD_BINARY_VVP_OP -#define ADD_BINARY_VVP_OP(X,Y) ADD_VVP_OP(X,Y) +#define ADD_BINARY_VVP_OP(X,Y) ADD_VVP_OP(X,Y) HANDLE_VP_TO_VVP(VP_##Y, X) #endif // Integer arithmetic. @@ -29,5 +36,6 @@ ADD_BINARY_VVP_OP(VVP_AND,AND) +#undef HANDLE_VP_TO_VVP #undef ADD_BINARY_VVP_OP #undef ADD_VVP_OP diff --git a/llvm/test/CodeGen/VE/Vector/vp_add.ll b/llvm/test/CodeGen/VE/Vector/vp_add.ll --- a/llvm/test/CodeGen/VE/Vector/vp_add.ll +++ b/llvm/test/CodeGen/VE/Vector/vp_add.ll @@ -1,16 +1,29 @@ -; REQUIRES: asserts -; RUN: not --crash llc %s -march=ve -mattr=+vpu -o /dev/null |& FileCheck %s +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s -; CHECK: t{{[0-9]+}}: v256i32 = vp_add [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] -; CHECK: [[A]]: v256i32 -; CHECK: [[B]]: v256i32 -; CHECK: [[MASK]]: v256i1 -; CHECK: [[EVL]]: i32 +declare <256 x i32> @llvm.vp.add.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) -define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +define fastcc <256 x i32> @test_vp_add_v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_add_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vadds.w.sx %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) %r0 = call <256 x i32> @llvm.vp.add.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) ret <256 x i32> %r0 } -; integer arith -declare <256 x i32> @llvm.vp.add.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) + +declare <256 x i64> @llvm.vp.add.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32) + +define fastcc <256 x i64> @test_vp_int_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_int_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vadds.l %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i64> @llvm.vp.add.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) + ret <256 x i64> %r0 +} + diff --git a/llvm/test/CodeGen/VE/Vector/vp_and.ll b/llvm/test/CodeGen/VE/Vector/vp_and.ll --- a/llvm/test/CodeGen/VE/Vector/vp_and.ll +++ b/llvm/test/CodeGen/VE/Vector/vp_and.ll @@ -1,16 +1,28 @@ -; REQUIRES: asserts -; RUN: not --crash llc %s -march=ve -mattr=+vpu -o /dev/null |& FileCheck %s +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s -; CHECK: t{{[0-9]+}}: v256i32 = vp_and [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] -; CHECK: [[A]]: v256i32 -; CHECK: [[B]]: v256i32 -; CHECK: [[MASK]]: v256i1 -; CHECK: [[EVL]]: i32 +declare <256 x i32> @llvm.vp.and.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) -define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +define fastcc <256 x i32> @test_vp_and_v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_and_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvand.lo %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) %r0 = call <256 x i32> @llvm.vp.and.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) ret <256 x i32> %r0 } -; integer arith -declare <256 x i32> @llvm.vp.and.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) + +declare <256 x i64> @llvm.vp.and.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32) + +define fastcc <256 x i64> @test_vp_int_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_int_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vand %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i64> @llvm.vp.and.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) + ret <256 x i64> %r0 +}