diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -1150,6 +1150,10 @@ VECREDUCE_UMAX, VECREDUCE_UMIN, +// Vector Predication +#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) VPSDID, +#include "llvm/IR/VPIntrinsics.def" + /// BUILTIN_OP_END - This must be the last enum value in this list. /// The target-specific pre-isel opcode values start here. BUILTIN_OP_END diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -17,68 +17,137 @@ // Provide definitions of macros so that users of this file do not have to // define everything to use it... // -#ifndef REGISTER_VP_INTRINSIC -#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) +// Register a VP intrinsic and begin its property scope. +// All VP intrinsic scopes are top level, ie it is illegal to place a +// BEGIN_REGISTER_VP_INTRINSIC within a VP intrinsic scope. +#ifndef BEGIN_REGISTER_VP_INTRINSIC +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) #endif -// Map this VP intrinsic to its functional Opcode +// End the property scope of a VP intrinsic. +#ifndef END_REGISTER_VP_INTRINSIC +#define END_REGISTER_VP_INTRINSIC(INTRINID) +#endif + +// Register a new VP SDNode and begin its property scope. +// When the SDNode scope is nested within a VP intrinsic scope, it is implicitly registered as the canonical SDNode for this VP intrinsic. +// There is one VP intrinsic that maps directly to one SDNode that goes by the +// same name. Since the operands are also the same, we open the property +// scopes for both the VPIntrinsic and the SDNode at once. +// \p SDID The SelectionDAG Node id (eg VP_ADD). +// \p LEGALPOS The operand position of the SDNode that is used for legalizing +// this SDNode. This can be `-1`, in which case the return type of +// the SDNode is used. +// \p TDNAME The name of the TableGen definition of this SDNode. +// \p MASKPOS The mask operand position. +// \p EVLPOS The explicit vector length operand position. +#ifndef BEGIN_REGISTER_VP_SDNODE +#define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALPOS, TDNAME, MASKPOS, EVLPOS) +#endif + +// End the property scope of a new VP SDNode. +#ifndef END_REGISTER_VP_SDNODE +#define END_REGISTER_VP_SDNODE(SDID) +#endif + +// Helper macros for the common "1:1 - Intrinsic : SDNode" case. +// +// There is one VP intrinsic that maps directly to one SDNode that goes by the +// same name. Since the operands are also the same, we open the property +// scopes for both the VPIntrinsic and the SDNode at once. +// +// \p INTRIN The cannonical name (eg `vp_add`, which at the same time is the +// name of the intrinsic and the TableGen def of the SDNode). +// \p MASKPOS The mask operand position. +// \p EVLPOS The explicit vector length operand position. +// \p SDID The SelectionDAG Node id (eg VP_ADD). +// \p LEGALPOS The operand position of the SDNode that is used for legalizing +// this SDNode. This can be `-1`, in which case the return type of +// the SDNode is used. +#define BEGIN_REGISTER_VP(INTRIN, MASKPOS, EVLPOS, SDID, LEGALPOS) \ +BEGIN_REGISTER_VP_INTRINSIC(INTRIN, MASKPOS, EVLPOS) \ +BEGIN_REGISTER_VP_SDNODE(SDID, LEGALPOS, INTRIN, MASKPOS, EVLPOS) + +#define END_REGISTER_VP(INTRIN, SDID) \ +END_REGISTER_VP_INTRINSIC(INTRIN) \ +END_REGISTER_VP_SDNODE(SDID) + + +// The following macros attach properties to the scope they are placed in. This +// assigns the property to the VP Intrinsic and/or SDNode that belongs to the +// scope. +// +// Property Macros { + +// The intrinsic and/or SDNode has the same function as this LLVM IR Opcode. +// \p OPC The standard IR opcode. #ifndef HANDLE_VP_TO_OPC -#define HANDLE_VP_TO_OPC(VPID, OC) +#define HANDLE_VP_TO_OPC(OPC) +#endif + +/// } Property Macros + +///// Integer Arithmetic { + +// Specialized helper macro for integer binary operators (%x, %y, %mask, %evl). +#ifdef HELPER_REGISTER_BINARY_INT_VP +#error "The internal helper macro HELPER_REGISTER_BINARY_INT_VP is already defined!" #endif +#define HELPER_REGISTER_BINARY_INT_VP(INTRIN, SDID, OPC) \ +BEGIN_REGISTER_VP(INTRIN, 2, 3, SDID, -1) \ +HANDLE_VP_TO_OPC(OPC) \ +END_REGISTER_VP(INTRIN, SDID) + -///// Integer Arithmetic ///// // llvm.vp.add(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_add, 2, 3) -HANDLE_VP_TO_OPC(vp_add, Add) +HELPER_REGISTER_BINARY_INT_VP(vp_add, VP_ADD, Add) // llvm.vp.and(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_and, 2, 3) -HANDLE_VP_TO_OPC(vp_and, And) +HELPER_REGISTER_BINARY_INT_VP(vp_and, VP_AND, And) // llvm.vp.ashr(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_ashr, 2, 3) -HANDLE_VP_TO_OPC(vp_ashr, AShr) +HELPER_REGISTER_BINARY_INT_VP(vp_ashr, VP_ASHR, AShr) // llvm.vp.lshr(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_lshr, 2, 3) -HANDLE_VP_TO_OPC(vp_lshr, LShr) +HELPER_REGISTER_BINARY_INT_VP(vp_lshr, VP_LSHR, LShr) // llvm.vp.mul(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_mul, 2, 3) -HANDLE_VP_TO_OPC(vp_mul, Mul) +HELPER_REGISTER_BINARY_INT_VP(vp_mul, VP_MUL, Mul) // llvm.vp.or(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_or, 2, 3) -HANDLE_VP_TO_OPC(vp_or, Or) +HELPER_REGISTER_BINARY_INT_VP(vp_or, VP_OR, Or) // llvm.vp.sdiv(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_sdiv, 2, 3) -HANDLE_VP_TO_OPC(vp_sdiv, SDiv) +HELPER_REGISTER_BINARY_INT_VP(vp_sdiv, VP_SDIV, SDiv) // llvm.vp.shl(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_shl, 2, 3) -HANDLE_VP_TO_OPC(vp_shl, Shl) +HELPER_REGISTER_BINARY_INT_VP(vp_shl, VP_SHL, Shl) // llvm.vp.srem(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_srem, 2, 3) -HANDLE_VP_TO_OPC(vp_srem, SRem) +HELPER_REGISTER_BINARY_INT_VP(vp_srem, VP_SREM, SRem) // llvm.vp.sub(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_sub, 2, 3) -HANDLE_VP_TO_OPC(vp_sub, Sub) +HELPER_REGISTER_BINARY_INT_VP(vp_sub, VP_Sub, Sub) // llvm.vp.udiv(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_udiv, 2, 3) -HANDLE_VP_TO_OPC(vp_udiv, UDiv) +HELPER_REGISTER_BINARY_INT_VP(vp_udiv, VP_UDIV, UDiv) // llvm.vp.urem(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_urem, 2, 3) -HANDLE_VP_TO_OPC(vp_urem, URem) +HELPER_REGISTER_BINARY_INT_VP(vp_urem, VP_UREM, URem) // llvm.vp.xor(x,y,mask,vlen) -REGISTER_VP_INTRINSIC(vp_xor, 2, 3) -HANDLE_VP_TO_OPC(vp_xor, Xor) +HELPER_REGISTER_BINARY_INT_VP(vp_xor, VP_XOR, Xor) + +#undef HELPER_REGISTER_BINARY_INT_VP + +///// } Integer Arithmetic + -#undef REGISTER_VP_INTRINSIC +#undef BEGIN_REGISTER_VP +#undef BEGIN_REGISTER_VP_INTRINSIC +#undef BEGIN_REGISTER_VP_SDNODE +#undef END_REGISTER_VP +#undef END_REGISTER_VP_INTRINSIC +#undef END_REGISTER_VP_SDNODE #undef HANDLE_VP_TO_OPC diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -759,6 +759,7 @@ void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI); + void visitVectorPredicationIntrinsic(const VPIntrinsic &VPI); void visitVAStart(const CallInst &I); void visitVAArg(const VAArgInst &I); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6137,6 +6137,10 @@ #include "llvm/IR/ConstrainedOps.def" visitConstrainedFPIntrinsic(cast(I)); return; +#define BEGIN_REGISTER_VP_INTRINSIC(VPINTRIN, ...) case Intrinsic::VPINTRIN: +#include "llvm/IR/VPIntrinsics.def" + visitVectorPredicationIntrinsic(cast(I)); + return; case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && @@ -7007,6 +7011,40 @@ setValue(&FPI, FPResult); } +static Optional getISDForVPIntrinsic(const VPIntrinsic &VPIntrin) { + Optional ResID; + unsigned IntrinID = VPIntrin.getIntrinsicID(); + + switch (IntrinID) { +#define BEGIN_REGISTER_VP_INTRINSIC(INTRIN, ...) case Intrinsic::INTRIN: +#define BEGIN_REGISTER_VP_SDNODE(VPSDID, ...) ResID = ISD::VPSDID; +#define END_REGISTER_VP_INTRINSIC(...) break; +#include "llvm/IR/VPIntrinsics.def" + } + + return ResID; +} + +void SelectionDAGBuilder::visitVectorPredicationIntrinsic( + const VPIntrinsic &VPI) { + unsigned Opcode = getISDForVPIntrinsic(VPI).getValue(); + + SmallVector ValueVTs; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + ComputeValueVTs(TLI, DAG.getDataLayout(), VPI.getType(), ValueVTs); + SDVTList VTs = DAG.getVTList(ValueVTs); + + // Request operands. + SmallVector OpValues; + for (int i = 0; i < (int)VPI.getNumArgOperands(); ++i) { + OpValues.push_back(getValue(VPI.getArgOperand(i))); + } + + SDLoc DL = getCurSDLoc(); + SDValue Result = DAG.getNode(Opcode, DL, VTs, OpValues); + setValue(&VPI, Result); +} + std::pair SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI, const BasicBlock *EHPadBB) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -466,6 +466,12 @@ case ISD::VECREDUCE_UMIN: return "vecreduce_umin"; case ISD::VECREDUCE_FMAX: return "vecreduce_fmax"; case ISD::VECREDUCE_FMIN: return "vecreduce_fmin"; + + // Vector Predication +#define BEGIN_REGISTER_VP_SDNODE(SDID, LEGALARG, NAME, ...) \ + case ISD::SDID: \ + return #NAME; +#include "llvm/IR/VPIntrinsics.def" } } diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -208,7 +208,7 @@ default: return None; -#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \ +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \ case Intrinsic::VPID: \ return MASKPOS; #include "llvm/IR/VPIntrinsics.def" @@ -220,7 +220,7 @@ default: return None; -#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \ +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \ case Intrinsic::VPID: \ return VLENPOS; #include "llvm/IR/VPIntrinsics.def" @@ -232,7 +232,7 @@ default: return false; -#define REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \ +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, MASKPOS, VLENPOS) \ case Intrinsic::VPID: \ break; #include "llvm/IR/VPIntrinsics.def" @@ -242,15 +242,17 @@ // Equivalent non-predicated opcode unsigned VPIntrinsic::GetFunctionalOpcodeForVP(Intrinsic::ID ID) { + unsigned FunctionalOC = Instruction::Call; switch (ID) { default: - return Instruction::Call; - -#define HANDLE_VP_TO_OPC(VPID, OPC) \ - case Intrinsic::VPID: \ - return Instruction::OPC; + break; +#define BEGIN_REGISTER_VP_INTRINSIC(VPID, ...) case Intrinsic::VPID: +#define HANDLE_VP_TO_OPC(OPC) FunctionalOC = Instruction::OPC; +#define END_REGISTER_VP_INTRINSIC(...) break; #include "llvm/IR/VPIntrinsics.def" } + + return FunctionalOC; } Intrinsic::ID VPIntrinsic::GetForOpcode(unsigned IROPC) { @@ -258,9 +260,8 @@ default: return Intrinsic::not_intrinsic; -#define HANDLE_VP_TO_OPC(VPID, OPC) \ - case Instruction::OPC: \ - return Intrinsic::VPID; +#define HANDLE_VP_TO_OPC(OPC) case Instruction::OPC: +#define END_REGISTER_VP_INTRINSIC(VPID) return Intrinsic::VPID; #include "llvm/IR/VPIntrinsics.def" } } diff --git a/llvm/test/CodeGen/VE/Vector/vp_add.ll b/llvm/test/CodeGen/VE/Vector/vp_add.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vp_add.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=ve -mattr=+vpu |& FileCheck %s + +; CHECK: vp_add +; XFAIL: * + +define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { + %r0 = call <256 x i32> @llvm.vp.add.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) + ret <256 x i32> %r0 +} + +; integer arith +declare <256 x i32> @llvm.vp.add.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) diff --git a/llvm/test/CodeGen/VE/Vector/vp_sub.ll b/llvm/test/CodeGen/VE/Vector/vp_sub.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vp_sub.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=ve -mattr=+vpu |& FileCheck %s + +; CHECK: vp_sub +; XFAIL: * + +define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { + %r0 = call <256 x i32> @llvm.vp.sub.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) + ret <256 x i32> %r0 +} + +; integer arith +declare <256 x i32> @llvm.vp.sub.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32)