diff --git a/llvm/lib/Target/VE/VVPInstrInfo.td b/llvm/lib/Target/VE/VVPInstrInfo.td --- a/llvm/lib/Target/VE/VVPInstrInfo.td +++ b/llvm/lib/Target/VE/VVPInstrInfo.td @@ -29,6 +29,16 @@ IsVLVT<4> ]>; +// BinaryFPOp(x,y,mask,vl) +def SDTFPBinOpVVP : SDTypeProfile<1, 4, [ // vvp_fadd, etc. + SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisFP<0>, + SDTCisInt<3>, + SDTCisSameNumEltsAs<0, 3>, + IsVLVT<4> +]>; + // Binary operator commutative pattern. class vvp_commutative : PatFrags< @@ -61,4 +71,11 @@ def vvp_sra : SDNode<"VEISD::VVP_SRA", SDTIntBinOpVVP>; def vvp_shl : SDNode<"VEISD::VVP_SHL", SDTIntBinOpVVP>; +def vvp_fadd : SDNode<"VEISD::VVP_FADD", SDTFPBinOpVVP>; +def c_vvp_fadd : vvp_commutative; +def vvp_fsub : SDNode<"VEISD::VVP_FSUB", SDTFPBinOpVVP>; +def vvp_fmul : SDNode<"VEISD::VVP_FMUL", SDTFPBinOpVVP>; +def c_vvp_fmul : vvp_commutative; +def vvp_fdiv : SDNode<"VEISD::VVP_FDIV", SDTFPBinOpVVP>; + // } Binary Operators diff --git a/llvm/lib/Target/VE/VVPInstrPatternsVec.td b/llvm/lib/Target/VE/VVPInstrPatternsVec.td --- a/llvm/lib/Target/VE/VVPInstrPatternsVec.td +++ b/llvm/lib/Target/VE/VVPInstrPatternsVec.td @@ -178,3 +178,16 @@ defm : Binary_vr_vv_ShortLong; + +defm : Binary_rv_vv_ShortLong; +defm : Binary_rv_vv_ShortLong; +defm : Binary_rv_vv_ShortLong; +defm : Binary_rv_vr_vv_ShortLong; diff --git a/llvm/lib/Target/VE/VVPNodes.def b/llvm/lib/Target/VE/VVPNodes.def --- a/llvm/lib/Target/VE/VVPNodes.def +++ b/llvm/lib/Target/VE/VVPNodes.def @@ -53,6 +53,12 @@ ADD_BINARY_VVP_OP_COMPACT(OR) ADD_BINARY_VVP_OP_COMPACT(XOR) +// FP arithmetic. +ADD_BINARY_VVP_OP_COMPACT(FADD) +ADD_BINARY_VVP_OP_COMPACT(FSUB) +ADD_BINARY_VVP_OP_COMPACT(FMUL) +ADD_BINARY_VVP_OP_COMPACT(FDIV) + #undef ADD_BINARY_VVP_OP #undef ADD_BINARY_VVP_OP_COMPACT #undef ADD_VVP_OP diff --git a/llvm/test/CodeGen/VE/Vector/vp_fadd.ll b/llvm/test/CodeGen/VE/Vector/vp_fadd.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vp_fadd.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +declare <256 x float> @llvm.vp.fadd.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32) + +define fastcc <256 x float> @test_vp_fadd_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fadd_v256f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvfadd.up %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fadd_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fadd_v256f32_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvfadd.up %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x float> undef, float %s0, i32 0 + %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fadd_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fadd_v256f32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvfadd.up %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x float> undef, float %s1, i32 0 + %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fadd.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + + +declare <256 x double> @llvm.vp.fadd.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32) + +define fastcc <256 x double> @test_vp_fadd_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fadd_v256f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vfadd.d %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fadd_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fadd_v256f64_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfadd.d %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x double> undef, double %s0, i32 0 + %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fadd_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fadd_v256f64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfadd.d %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x double> undef, double %s1, i32 0 + %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fadd.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} diff --git a/llvm/test/CodeGen/VE/Vector/vp_fdiv.ll b/llvm/test/CodeGen/VE/Vector/vp_fdiv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vp_fdiv.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +declare <256 x float> @llvm.vp.fdiv.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32) + +define fastcc <256 x float> @test_vp_fdiv_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fdiv_v256f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vfdiv.s %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fdiv_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fdiv_v256f32_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfdiv.s %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x float> undef, float %s0, i32 0 + %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fdiv_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fdiv_v256f32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfdiv.s %v0, %v0, %s0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x float> undef, float %s1, i32 0 + %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fdiv.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + + +declare <256 x double> @llvm.vp.fdiv.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32) + +define fastcc <256 x double> @test_vp_fdiv_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fdiv_v256f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vfdiv.d %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fdiv_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fdiv_v256f64_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfdiv.d %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x double> undef, double %s0, i32 0 + %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fdiv_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fdiv_v256f64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfdiv.d %v0, %v0, %s0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x double> undef, double %s1, i32 0 + %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fdiv.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} diff --git a/llvm/test/CodeGen/VE/Vector/vp_fmul.ll b/llvm/test/CodeGen/VE/Vector/vp_fmul.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vp_fmul.ll @@ -0,0 +1,81 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +declare <256 x float> @llvm.vp.fmul.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32) + +define fastcc <256 x float> @test_vp_fmul_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fmul_v256f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvfmul.up %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fmul_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fmul_v256f32_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvfmul.up %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x float> undef, float %s0, i32 0 + %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fmul_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fmul_v256f32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvfmul.up %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x float> undef, float %s1, i32 0 + %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fmul.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + + +declare <256 x double> @llvm.vp.fmul.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32) + +define fastcc <256 x double> @test_vp_fmul_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fmul_v256f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vfmul.d %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fmul_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fmul_v256f64_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfmul.d %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x double> undef, double %s0, i32 0 + %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fmul_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fmul_v256f64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfmul.d %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x double> undef, double %s1, i32 0 + %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fmul.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} diff --git a/llvm/test/CodeGen/VE/Vector/vp_fsub.ll b/llvm/test/CodeGen/VE/Vector/vp_fsub.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Vector/vp_fsub.ll @@ -0,0 +1,87 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s + +declare <256 x float> @llvm.vp.fsub.v256f32(<256 x float>, <256 x float>, <256 x i1>, i32) + +define fastcc <256 x float> @test_vp_fsub_v256f32_vv(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fsub_v256f32_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: pvfsub.up %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fsub_v256f32_rv(float %s0, <256 x float> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fsub_v256f32_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvfsub.up %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x float> undef, float %s0, i32 0 + %i0 = shufflevector <256 x float> %xins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + +define fastcc <256 x float> @test_vp_fsub_v256f32_vr(<256 x float> %i0, float %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fsub_v256f32_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: pvfsub.up %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x float> undef, float %s1, i32 0 + %i1 = shufflevector <256 x float> %yins, <256 x float> undef, <256 x i32> zeroinitializer + %r0 = call <256 x float> @llvm.vp.fsub.v256f32(<256 x float> %i0, <256 x float> %i1, <256 x i1> %m, i32 %n) + ret <256 x float> %r0 +} + + +declare <256 x double> @llvm.vp.fsub.v256f64(<256 x double>, <256 x double>, <256 x i1>, i32) + +define fastcc <256 x double> @test_vp_fsub_v256f64_vv(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fsub_v256f64_vv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s0, %s0, (32)0 +; CHECK-NEXT: lvl %s0 +; CHECK-NEXT: vfsub.d %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fsub_v256f64_rv(double %s0, <256 x double> %i1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fsub_v256f64_rv: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfsub.d %v0, %s0, %v0, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %xins = insertelement <256 x double> undef, double %s0, i32 0 + %i0 = shufflevector <256 x double> %xins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +} + +define fastcc <256 x double> @test_vp_fsub_v256f64_vr(<256 x double> %i0, double %s1, <256 x i1> %m, i32 %n) { +; CHECK-LABEL: test_vp_fsub_v256f64_vr: +; CHECK: # %bb.0: +; CHECK-NEXT: and %s1, %s1, (32)0 +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vbrd %v1, %s0 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vfsub.d %v0, %v0, %v1, %vm1 +; CHECK-NEXT: b.l.t (, %s10) + %yins = insertelement <256 x double> undef, double %s1, i32 0 + %i1 = shufflevector <256 x double> %yins, <256 x double> undef, <256 x i32> zeroinitializer + %r0 = call <256 x double> @llvm.vp.fsub.v256f64(<256 x double> %i0, <256 x double> %i1, <256 x i1> %m, i32 %n) + ret <256 x double> %r0 +}