diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp --- a/llvm/lib/Target/VE/VEISelLowering.cpp +++ b/llvm/lib/Target/VE/VEISelLowering.cpp @@ -1720,7 +1720,7 @@ case ISD::EXTRACT_VECTOR_ELT: return lowerEXTRACT_VECTOR_ELT(Op, DAG); -#define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME: +#define ADD_BINARY_VVP_OP(VVP_NAME, VP_NAME, ISD_NAME) case ISD::ISD_NAME: #include "VVPNodes.def" return lowerToVVP(Op, DAG); } diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td --- a/llvm/lib/Target/VE/VVPInstrInfo.td +++ b/llvm/lib/Target/VE/VVPInstrInfo.td @@ -54,4 +54,8 @@ def vvp_xor : SDNode<"VEISD::VVP_XOR", SDTIntBinOpVVP>; def c_vvp_xor : vvp_commutative; +def vvp_srl : SDNode<"VEISD::VVP_SRL", SDTIntBinOpVVP>; +def vvp_sra : SDNode<"VEISD::VVP_SRA", SDTIntBinOpVVP>; +def vvp_shl : SDNode<"VEISD::VVP_SHL", SDTIntBinOpVVP>; + // } Binary Operators diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td --- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td +++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td @@ -39,6 +39,28 @@ ScalarVT:$sx, $vy, $mask, $avl)>; } +multiclass Binary_vr { + // Masked with select, broadcast. + // TODO + + // Unmasked, broadcast. + def : Pat<(OpNode + DataVT:$vx, (any_broadcast ScalarVT:$sy), + (MaskVT true_mask), + i32:$avl), + (!cast(OpBaseName#"vrl") + $vx, ScalarVT:$sy, $avl)>; + // Masked, broadcast. + def : Pat<(OpNode + DataVT:$vx, (any_broadcast ScalarVT:$sy), + MaskVT:$mask, + i32:$avl), + (!cast(OpBaseName#"vrml") + $vx, ScalarVT:$sy, $mask, $avl)>; +} + multiclass Binary_vv { @@ -70,6 +92,14 @@ defm : Binary_vv; } +multiclass Binary_vr_vv< + SDPatternOperator OpNode, + ValueType ScalarVT, ValueType DataVT, ValueType MaskVT, + string OpBaseName> { + defm : Binary_vr; + defm : Binary_vv; +} + // Expand both 64bit and 32 bit variant (256 elements) multiclass Binary_rv_vv_ShortLong< SDPatternOperator OpNode, @@ -83,6 +113,17 @@ ShortOpBaseName>; } +multiclass Binary_vr_vv_ShortLong< + SDPatternOperator OpNode, + ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName, + ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> { + defm : Binary_vr_vv; + defm : Binary_vr_vv; +} defm : Binary_rv_vv_ShortLong; +defm : Binary_vr_vv_ShortLong; +defm : Binary_vr_vv_ShortLong; +defm : Binary_vr_vv_ShortLong; diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def --- a/llvm/lib/Target/VE/VVPNodes.def +++ b/llvm/lib/Target/VE/VVPNodes.def @@ -28,18 +28,30 @@ /// \p VVPName is a VVP Binary operator. /// \p SDNAME is the generic SD opcode corresponding to \p VVPName. #ifndef ADD_BINARY_VVP_OP -#define ADD_BINARY_VVP_OP(X,Y) ADD_VVP_OP(X,Y) HANDLE_VP_TO_VVP(VP_##Y, X) +#define ADD_BINARY_VVP_OP(VVPNAME,VPNAME,SDNAME) \ + ADD_VVP_OP(VVPNAME,SDNAME) \ + HANDLE_VP_TO_VVP(VPNAME, VVPNAME) +#endif + +#ifndef ADD_BINARY_VVP_OP_COMPACT +#define ADD_BINARY_VVP_OP_COMPACT(NAME) \ + ADD_BINARY_VVP_OP(VVP_##NAME,VP_##NAME,NAME) #endif // Integer arithmetic. -ADD_BINARY_VVP_OP(VVP_ADD,ADD) -ADD_BINARY_VVP_OP(VVP_SUB,SUB) -ADD_BINARY_VVP_OP(VVP_MUL,MUL) +ADD_BINARY_VVP_OP_COMPACT(ADD) +ADD_BINARY_VVP_OP_COMPACT(SUB) +ADD_BINARY_VVP_OP_COMPACT(MUL) -ADD_BINARY_VVP_OP(VVP_AND,AND) -ADD_BINARY_VVP_OP(VVP_OR,OR) -ADD_BINARY_VVP_OP(VVP_XOR,XOR) +ADD_BINARY_VVP_OP(VVP_SRA,VP_ASHR,SRA) +ADD_BINARY_VVP_OP(VVP_SRL,VP_LSHR,SRL) +ADD_BINARY_VVP_OP_COMPACT(SHL) + +ADD_BINARY_VVP_OP_COMPACT(AND) +ADD_BINARY_VVP_OP_COMPACT(OR) +ADD_BINARY_VVP_OP_COMPACT(XOR) -#undef HANDLE_VP_TO_VVP #undef ADD_BINARY_VVP_OP +#undef ADD_BINARY_VVP_OP_COMPACT #undef ADD_VVP_OP +#undef HANDLE_VP_TO_VVP diff --git a/llvm/test/CodeGen/VE/Vector/vp_ashr.ll b/llvm/test/CodeGen/VE/Vector/vp_ashr.ll --- a/llvm/test/CodeGen/VE/Vector/vp_ashr.ll +++ b/llvm/test/CodeGen/VE/Vector/vp_ashr.ll @@ -1,16 +1,29 @@ -; REQUIRES: asserts -; RUN: not --crash llc < %s -march=ve -mattr=+vpu -o /dev/null 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s -; CHECK: t{{[0-9]+}}: v256i32 = vp_ashr [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] -; CHECK: [[A]]: v256i32 -; CHECK: [[B]]: v256i32 -; CHECK: [[MASK]]: v256i1 -; CHECK: [[EVL]]: i32 +declare <256 x i32> @llvm.vp.ashr.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) -define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +define fastcc <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_int: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvsra.lo %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) %r0 = call <256 x i32> @llvm.vp.ashr.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) ret <256 x i32> %r0 } -; integer arith -declare <256 x i32> @llvm.vp.ashr.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) + +declare <256 x i64> @llvm.vp.ashr.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32) + +define fastcc <256 x i64> @test_vp_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vsra.l %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i64> @llvm.vp.ashr.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) + ret <256 x i64> %r0 +} diff --git a/llvm/test/CodeGen/VE/Vector/vp_lshr.ll b/llvm/test/CodeGen/VE/Vector/vp_lshr.ll --- a/llvm/test/CodeGen/VE/Vector/vp_lshr.ll +++ b/llvm/test/CodeGen/VE/Vector/vp_lshr.ll @@ -1,16 +1,30 @@ -; REQUIRES: asserts -; RUN: not --crash llc < %s -march=ve -mattr=+vpu -o /dev/null 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s -; CHECK: t{{[0-9]+}}: v256i32 = vp_lshr [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] -; CHECK: [[A]]: v256i32 -; CHECK: [[B]]: v256i32 -; CHECK: [[MASK]]: v256i1 -; CHECK: [[EVL]]: i32 +declare <256 x i32> @llvm.vp.lshr.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) -define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +define fastcc <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_int: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvsrl.lo %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) %r0 = call <256 x i32> @llvm.vp.lshr.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) ret <256 x i32> %r0 } -; integer arith -declare <256 x i32> @llvm.vp.lshr.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) + +declare <256 x i64> @llvm.vp.lshr.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32) + +define fastcc <256 x i64> @test_vp_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vsrl %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i64> @llvm.vp.lshr.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) + ret <256 x i64> %r0 +} + diff --git a/llvm/test/CodeGen/VE/Vector/vp_shl.ll b/llvm/test/CodeGen/VE/Vector/vp_shl.ll --- a/llvm/test/CodeGen/VE/Vector/vp_shl.ll +++ b/llvm/test/CodeGen/VE/Vector/vp_shl.ll @@ -1,16 +1,29 @@ -; REQUIRES: asserts -; RUN: not --crash llc < %s -march=ve -mattr=+vpu -o /dev/null 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s -; CHECK: t{{[0-9]+}}: v256i32 = vp_shl [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] -; CHECK: [[A]]: v256i32 -; CHECK: [[B]]: v256i32 -; CHECK: [[MASK]]: v256i1 -; CHECK: [[EVL]]: i32 +declare <256 x i32> @llvm.vp.shl.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) -define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +define fastcc <256 x i32> @test_vp_v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvsll.lo %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) %r0 = call <256 x i32> @llvm.vp.shl.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) ret <256 x i32> %r0 } -; integer arith -declare <256 x i32> @llvm.vp.shl.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) + +declare <256 x i64> @llvm.vp.shl.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32) + +define fastcc <256 x i64> @test_vp_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vsll %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i64> @llvm.vp.shl.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) + ret <256 x i64> %r0 +} diff --git a/llvm/test/CodeGen/VE/Vector/vp_sra.ll b/llvm/test/CodeGen/VE/Vector/vp_sra.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vp_sra.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +declare <256 x i32> @llvm.vp.ashr.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) + +define fastcc <256 x i32> @test_vp_ashr_v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_ashr_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvsra.lo %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i32> @llvm.vp.ashr.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) + ret <256 x i32> %r0 +} + + +declare <256 x i64> @llvm.vp.ashr.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32) + +define fastcc <256 x i64> @test_vp_int_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_int_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vsra.l %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i64> @llvm.vp.ashr.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) + ret <256 x i64> %r0 +} + diff --git a/llvm/test/CodeGen/VE/Vector/vp_srl.ll b/llvm/test/CodeGen/VE/Vector/vp_srl.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vp_srl.ll @@ -0,0 +1,30 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +declare <256 x i32> @llvm.vp.lshr.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) + +define fastcc <256 x i32> @test_vp_lshr_v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_lshr_v256i32: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvsrl.lo %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i32> @llvm.vp.lshr.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) + ret <256 x i32> %r0 +} + + +declare <256 x i64> @llvm.vp.lshr.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32) + +define fastcc <256 x i64> @test_vp_int_v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_int_v256i64: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vsrl %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i64> @llvm.vp.lshr.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) + ret <256 x i64> %r0 +} +