Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -14536,7 +14536,7 @@ // convert the division into a scalar op, that will likely be much faster. unsigned NumElts = 1; EVT VT = N->getValueType(0); - if (VT.isVector() && DAG.isSplatValue(N1)) + if (VT.isVector() && DAG.isSplatValue(N1) && TLI.isExtractVecEltCheap(VT, 0)) NumElts = VT.getVectorMinNumElements(); if (!MinUses || (N1->use_size() * NumElts) < MinUses) Index: llvm/test/CodeGen/AArch64/fdiv-combine.ll =================================================================== --- llvm/test/CodeGen/AArch64/fdiv-combine.ll +++ llvm/test/CodeGen/AArch64/fdiv-combine.ll @@ -120,11 +120,9 @@ define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 { ; CHECK-LABEL: splat_fdiv_v4f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov v2.4s, #1.00000000 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: dup v0.4s, v0.s[0] -; CHECK-NEXT: fdiv v0.4s, v2.4s, v0.4s -; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s +; CHECK-NEXT: fdiv v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ret entry: %D.ins = insertelement <4 x float> poison, float %D, i64 0 @@ -137,11 +135,9 @@ ; CHECK-LABEL: splat_fdiv_nxv4f32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 -; CHECK-NEXT: fmov z2.s, #1.00000000 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov z0.s, s0 -; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: fmul z0.s, z1.s, z0.s +; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret entry: %D.ins = insertelement poison, float %D, i64 0 @@ -191,12 +187,12 @@ ; CHECK-LABEL: splat_two_fdiv_nxv2f64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: fmov z3.d, #1.00000000 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z0.d, d0 -; CHECK-NEXT: fdiv z3.d, p0/m, z3.d, z0.d -; CHECK-NEXT: fmul z0.d, z1.d, z3.d -; CHECK-NEXT: fmul z1.d, z2.d, z3.d +; CHECK-NEXT: mov z3.d, d0 +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z3.d +; CHECK-NEXT: movprfx z1, z2 +; CHECK-NEXT: fdiv z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: b foo_2_nxv2f64 entry: %D.ins = insertelement poison, double %D, i64 0