diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td --- a/llvm/lib/Target/VE/VVPInstrInfo.td +++ b/llvm/lib/Target/VE/VVPInstrInfo.td @@ -45,6 +45,9 @@ def vvp_mul : SDNode<"VEISD::VVP_MUL", SDTIntBinOpVVP>; def c_vvp_mul : vvp_commutative; +def vvp_sdiv : SDNode<"VEISD::VVP_SDIV", SDTIntBinOpVVP>; +def vvp_udiv : SDNode<"VEISD::VVP_UDIV", SDTIntBinOpVVP>; + def vvp_and : SDNode<"VEISD::VVP_AND", SDTIntBinOpVVP>; def c_vvp_and : vvp_commutative; diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td --- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td +++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td @@ -100,6 +100,14 @@ defm : Binary_vv; } +multiclass Binary_rv_vr_vv< + SDPatternOperator OpNode, + ValueType ScalarVT, ValueType DataVT, ValueType MaskVT, + string OpBaseName> { + defm : Binary_rv; + defm : Binary_vr_vv; +} + // Expand both 64bit and 32 bit variant (256 elements) multiclass Binary_rv_vv_ShortLong< SDPatternOperator OpNode, @@ -125,6 +133,18 @@ ShortOpBaseName>; } +multiclass Binary_rv_vr_vv_ShortLong< + SDPatternOperator OpNode, + ValueType LongScalarVT, ValueType LongDataVT, string LongOpBaseName, + ValueType ShortScalarVT, ValueType ShortDataVT, string ShortOpBaseName> { + defm : Binary_rv_vr_vv; + defm : Binary_rv_vr_vv; +} + defm : Binary_rv_vv_ShortLong; @@ -134,6 +154,12 @@ defm : Binary_rv_vv_ShortLong; +defm : Binary_rv_vr_vv_ShortLong; +defm : Binary_rv_vr_vv_ShortLong; defm : Binary_rv_vv_ShortLong; diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def --- a/llvm/lib/Target/VE/VVPNodes.def +++ b/llvm/lib/Target/VE/VVPNodes.def @@ -42,6 +42,8 @@ ADD_BINARY_VVP_OP_COMPACT(ADD) ADD_BINARY_VVP_OP_COMPACT(SUB) ADD_BINARY_VVP_OP_COMPACT(MUL) +ADD_BINARY_VVP_OP_COMPACT(UDIV) +ADD_BINARY_VVP_OP_COMPACT(SDIV) ADD_BINARY_VVP_OP(VVP_SRA,VP_ASHR,SRA) ADD_BINARY_VVP_OP(VVP_SRL,VP_LSHR,SRL) diff --git a/llvm/test/CodeGen/VE/Vector/vp_sdiv.ll b/llvm/test/CodeGen/VE/Vector/vp_sdiv.ll --- a/llvm/test/CodeGen/VE/Vector/vp_sdiv.ll +++ b/llvm/test/CodeGen/VE/Vector/vp_sdiv.ll @@ -1,16 +1,83 @@ -; REQUIRES: asserts -; RUN: not --crash llc < %s -march=ve -mattr=+vpu -o /dev/null 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s -; CHECK: t{{[0-9]+}}: v256i32 = vp_sdiv [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] -; CHECK: [[A]]: v256i32 -; CHECK: [[B]]: v256i32 -; CHECK: [[MASK]]: v256i1 -; CHECK: [[EVL]]: i32 +declare <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) -define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +define fastcc <256 x i32> @test_vp_sdiv_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_sdiv_v256i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vdivs.w.sx %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) %r0 = call <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) ret <256 x i32> %r0 } -; integer arith -declare <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) +define fastcc <256 x i32> @test_vp_sdiv_v256i32_rv(i32 %s0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_sdiv_v256i32_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vdivs.w.sx %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i32> undef, i32 %s0, i32 0 + %i0 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer + %r0 = call <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) + ret <256 x i32> %r0 +} + +define fastcc <256 x i32> @test_vp_sdiv_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_sdiv_v256i32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vdivs.w.sx %v0, %v0, %s0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x i32> undef, i32 %s1, i32 0 + %i1 = shufflevector <256 x i32> %yins, <256 x i32> undef, <256 x i32> zeroinitializer + %r0 = call <256 x i32> @llvm.vp.sdiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) + ret <256 x i32> %r0 +} + + +declare <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32) + +define fastcc <256 x i64> @test_vp_int_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_int_v256i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vdivs.l %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) + ret <256 x i64> %r0 +} + +define fastcc <256 x i64> @test_vp_sdiv_v256i64_rv(i64 %s0, <256 x i64> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_sdiv_v256i64_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vdivs.l %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i64> undef, i64 %s0, i32 0 + %i0 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer + %r0 = call <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) + ret <256 x i64> %r0 +} + +define fastcc <256 x i64> @test_vp_sdiv_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_sdiv_v256i64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vdivs.l %v0, %v0, %s0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x i64> undef, i64 %s1, i32 0 + %i1 = shufflevector <256 x i64> %yins, <256 x i64> undef, <256 x i32> zeroinitializer + %r0 = call <256 x i64> @llvm.vp.sdiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) + ret <256 x i64> %r0 +} diff --git a/llvm/test/CodeGen/VE/Vector/vp_udiv.ll b/llvm/test/CodeGen/VE/Vector/vp_udiv.ll --- a/llvm/test/CodeGen/VE/Vector/vp_udiv.ll +++ b/llvm/test/CodeGen/VE/Vector/vp_udiv.ll @@ -1,16 +1,83 @@ -; REQUIRES: asserts -; RUN: not --crash llc < %s -march=ve -mattr=+vpu -o /dev/null 2>&1 | FileCheck %s +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s -; CHECK: t{{[0-9]+}}: v256i32 = vp_udiv [[A:t[0-9]+]], [[B:t[0-9]+]], [[MASK:t[0-9]+]], [[EVL:t[0-9]+]] -; CHECK: [[A]]: v256i32 -; CHECK: [[B]]: v256i32 -; CHECK: [[MASK]]: v256i1 -; CHECK: [[EVL]]: i32 +declare <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) -define <256 x i32> @test_vp_int(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +define fastcc <256 x i32> @test_vp_udiv_v256i32_vv(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_udiv_v256i32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vdivu.w %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) %r0 = call <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) ret <256 x i32> %r0 } -; integer arith -declare <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32>, <256 x i32>, <256 x i1>, i32) +define fastcc <256 x i32> @test_vp_udiv_v256i32_rv(i32 %s0, <256 x i32> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_udiv_v256i32_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vdivu.w %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i32> undef, i32 %s0, i32 0 + %i0 = shufflevector <256 x i32> %xins, <256 x i32> undef, <256 x i32> zeroinitializer + %r0 = call <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) + ret <256 x i32> %r0 +} + +define fastcc <256 x i32> @test_vp_udiv_v256i32_vr(<256 x i32> %i0, i32 %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_udiv_v256i32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vdivu.w %v0, %v0, %s0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x i32> undef, i32 %s1, i32 0 + %i1 = shufflevector <256 x i32> %yins, <256 x i32> undef, <256 x i32> zeroinitializer + %r0 = call <256 x i32> @llvm.vp.udiv.v256i32(<256 x i32> %i0, <256 x i32> %i1, <256 x i1> %m, i32 %n) + ret <256 x i32> %r0 +} + + +declare <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64>, <256 x i64>, <256 x i1>, i32) + +define fastcc <256 x i64> @test_vp_int_v256i64_vv(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_int_v256i64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vdivu.l %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) + ret <256 x i64> %r0 +} + +define fastcc <256 x i64> @test_vp_udiv_v256i64_rv(i64 %s0, <256 x i64> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_udiv_v256i64_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vdivu.l %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x i64> undef, i64 %s0, i32 0 + %i0 = shufflevector <256 x i64> %xins, <256 x i64> undef, <256 x i32> zeroinitializer + %r0 = call <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) + ret <256 x i64> %r0 +} + +define fastcc <256 x i64> @test_vp_udiv_v256i64_vr(<256 x i64> %i0, i64 %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_udiv_v256i64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vdivu.l %v0, %v0, %s0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x i64> undef, i64 %s1, i32 0 + %i1 = shufflevector <256 x i64> %yins, <256 x i64> undef, <256 x i32> zeroinitializer + %r0 = call <256 x i64> @llvm.vp.udiv.v256i64(<256 x i64> %i0, <256 x i64> %i1, <256 x i1> %m, i32 %n) + ret <256 x i64> %r0 +}