Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14537,7 +14537,7 @@ unsigned NumElts = 1; EVT VT = N->getValueType(0); if (VT.isVector() && DAG.isSplatValue(N1)) - NumElts = VT.getVectorNumElements(); + NumElts = VT.getVectorMinNumElements(); if (!MinUses || (N1->use_size() * NumElts) < MinUses) return SDValue(); Index: llvm/test/CodeGen/AArch64/fdiv-combine.ll =================================================================== --- llvm/test/CodeGen/AArch64/fdiv-combine.ll +++ llvm/test/CodeGen/AArch64/fdiv-combine.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-unknown-unknown < %s | FileCheck %s ; Following test cases check: @@ -6,11 +7,13 @@ ; recip = 1.0 / D; a * recip; b * recip; c * recip; define void @three_fdiv_float(float %D, float %a, float %b, float %c) #0 { ; CHECK-LABEL: three_fdiv_float: -; CHECK: fdiv s -; CHECK-NOT: fdiv -; CHECK: fmul -; CHECK: fmul -; CHECK: fmul +; CHECK: // %bb.0: +; CHECK-NEXT: fmov s4, #1.00000000 +; CHECK-NEXT: fdiv s4, s4, s0 +; CHECK-NEXT: fmul s0, s1, s4 +; CHECK-NEXT: fmul s1, s2, s4 +; CHECK-NEXT: fmul s2, s3, s4 +; CHECK-NEXT: b foo_3f %div = fdiv float %a, %D %div1 = fdiv float %b, %D %div2 = fdiv float %c, %D @@ -20,11 +23,13 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) #0 { ; CHECK-LABEL: three_fdiv_double: -; CHECK: fdiv d -; CHECK-NOT: fdiv -; CHECK: fmul -; CHECK: fmul -; CHECK: fmul +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d4, #1.00000000 +; CHECK-NEXT: fdiv d4, d4, d0 +; CHECK-NEXT: fmul d0, d1, d4 +; CHECK-NEXT: fmul d1, d2, d4 +; CHECK-NEXT: fmul d2, d3, d4 +; CHECK-NEXT: b foo_3d %div = fdiv double %a, %D %div1 = fdiv double %b, %D %div2 = fdiv double %c, %D @@ -34,11 +39,13 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { ; CHECK-LABEL: three_fdiv_4xfloat: -; CHECK: fdiv v -; CHECK-NOT: fdiv -; CHECK: fmul -; CHECK: fmul -; CHECK: fmul +; CHECK: // %bb.0: +; CHECK-NEXT: fmov v4.4s, #1.00000000 +; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s +; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s +; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s +; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s +; CHECK-NEXT: b foo_3_4xf %div = fdiv <4 x float> %a, %D %div1 = fdiv <4 x float> %b, %D %div2 = fdiv <4 x float> %c, %D @@ -48,11 +55,13 @@ define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) #0 { ; CHECK-LABEL: three_fdiv_2xdouble: -; CHECK: fdiv v -; CHECK-NOT: fdiv -; CHECK: fmul -; CHECK: fmul -; CHECK: fmul +; CHECK: // %bb.0: +; CHECK-NEXT: fmov v4.2d, #1.00000000 +; CHECK-NEXT: fdiv v4.2d, v4.2d, v0.2d +; CHECK-NEXT: fmul v0.2d, v1.2d, v4.2d +; CHECK-NEXT: fmul v1.2d, v2.2d, v4.2d +; CHECK-NEXT: fmul v2.2d, v3.2d, v4.2d +; CHECK-NEXT: b foo_3_2xd %div = fdiv <2 x double> %a, %D %div1 = fdiv <2 x double> %b, %D %div2 = fdiv <2 x double> %c, %D @@ -64,9 +73,11 @@ ; calculates a reciprocal. define void @two_fdiv_float(float %D, float %a, float %b) #0 { ; CHECK-LABEL: two_fdiv_float: -; CHECK: fdiv s -; CHECK: fdiv s -; CHECK-NOT: fmul +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv s3, s1, s0 +; CHECK-NEXT: fdiv s1, s2, s0 +; CHECK-NEXT: fmov s0, s3 +; CHECK-NEXT: b foo_2f %div = fdiv float %a, %D %div1 = fdiv float %b, %D tail call void @foo_2f(float %div, float %div1) @@ -75,20 +86,135 @@ define void @two_fdiv_double(double %D, double %a, double %b) #0 { ; CHECK-LABEL: two_fdiv_double: -; CHECK: fdiv d -; CHECK: fdiv d -; CHECK-NOT: fmul +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv d3, d1, d0 +; CHECK-NEXT: fdiv d1, d2, d0 +; CHECK-NEXT: fmov d0, d3 +; CHECK-NEXT: b foo_2d %div = fdiv double %a, %D %div1 = fdiv double %b, %D tail call void @foo_2d(double %div, double %div1) ret void } +define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { +; CHECK-LABEL: splat_three_fdiv_4xfloat: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov v4.4s, #1.00000000 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s +; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s +; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s +; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s +; CHECK-NEXT: b foo_3_4xf + %D.ins = insertelement <4 x float> poison, float %D, i64 0 + %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer + %div = fdiv <4 x float> %a, %splat + %div1 = fdiv <4 x float> %b, %splat + %div2 = fdiv <4 x float> %c, %splat + tail call void @foo_3_4xf(<4 x float> %div, <4 x float> %div1, <4 x float> %div2) + ret void +} + +define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 { +; CHECK-LABEL: splat_fdiv_v4f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov v2.4s, #1.00000000 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: fdiv v0.4s, v2.4s, v0.4s +; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret +entry: + %D.ins = insertelement <4 x float> poison, float %D, i64 0 + %splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer + %div = fdiv <4 x float> %a, %splat + ret <4 x float> %div +} + +define @splat_fdiv_nxv4f32(float %D, %a) #1 { +; CHECK-LABEL: splat_fdiv_nxv4f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: fmov z2.s, #1.00000000 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z0.s, s0 +; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: fmul z0.s, z1.s, z0.s +; CHECK-NEXT: ret +entry: + %D.ins = insertelement poison, float %D, i64 0 + %splat = shufflevector %D.ins, poison, zeroinitializer + %div = fdiv %a, %splat + ret %div +} + +define void @splat_three_fdiv_nxv4f32(float %D, %a, %b, %c) #1 { +; CHECK-LABEL: splat_three_fdiv_nxv4f32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: fmov z4.s, #1.00000000 +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z0.s, s0 +; CHECK-NEXT: fdiv z4.s, p0/m, z4.s, z0.s +; CHECK-NEXT: fmul z0.s, z1.s, z4.s +; CHECK-NEXT: fmul z1.s, z2.s, z4.s +; CHECK-NEXT: fmul z2.s, z3.s, z4.s +; CHECK-NEXT: b foo_3_nxv4f32 +entry: + %D.ins = insertelement poison, float %D, i64 0 + %splat = shufflevector %D.ins, poison, zeroinitializer + %div = fdiv %a, %splat + %div1 = fdiv %b, %splat + %div2 = fdiv %c, %splat + tail call void @foo_3_nxv4f32( %div, %div1, %div2) + ret void +} + +define @splat_fdiv_nxv2f64(double %D, %a) #1 { +; CHECK-LABEL: splat_fdiv_nxv2f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z0.d, d0 +; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret +entry: + %D.ins = insertelement poison, double %D, i64 0 + %splat = shufflevector %D.ins, poison, zeroinitializer + %div = fdiv %a, %splat + ret %div +} + +define void @splat_two_fdiv_nxv2f64(double %D, %a, %b) #1 { +; CHECK-LABEL: splat_two_fdiv_nxv2f64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: fmov z3.d, #1.00000000 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z0.d, d0 +; CHECK-NEXT: fdiv z3.d, p0/m, z3.d, z0.d +; CHECK-NEXT: fmul z0.d, z1.d, z3.d +; CHECK-NEXT: fmul z1.d, z2.d, z3.d +; CHECK-NEXT: b foo_2_nxv2f64 +entry: + %D.ins = insertelement poison, double %D, i64 0 + %splat = shufflevector %D.ins, poison, zeroinitializer + %div = fdiv %a, %splat + %div1 = fdiv %b, %splat + tail call void @foo_2_nxv2f64( %div, %div1) + ret void +} + declare void @foo_3f(float, float, float) declare void @foo_3d(double, double, double) declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>) declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>) declare void @foo_2f(float, float) declare void @foo_2d(double, double) +declare void @foo_3_nxv4f32(, , ) +declare void @foo_2_nxv2f64(, ) attributes #0 = { "unsafe-fp-math"="true" } +attributes #1 = { "unsafe-fp-math"="true" "target-features"="+sve" }