Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -6597,6 +6597,17 @@ !isa(V1.getOperand(Lane))) return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane)); + // Splat multiply operands to allow selecting this as an (f)mul by element: + // splat (mul X, Y), Lane --> mul (splat X, Lane), (splat Y, Lane) + if (V1.hasOneUse() && + (V1.getOpcode() == ISD::FMUL || V1.getOpcode() == ISD::MUL)) { + SDValue SplatX = DAG.getVectorShuffle(VT, dl, V1.getOperand(0), + DAG.getUNDEF(VT), ShuffleMask); + SDValue SplatY = DAG.getVectorShuffle(VT, dl, V1.getOperand(1), + DAG.getUNDEF(VT), ShuffleMask); + return DAG.getNode(V1.getOpcode(), dl, VT, SplatX, SplatY); + } + // Otherwise, duplicate from the lane of the input vector. unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType()); Index: llvm/test/CodeGen/AArch64/mul_by_elt.ll =================================================================== --- llvm/test/CodeGen/AArch64/mul_by_elt.ll +++ llvm/test/CodeGen/AArch64/mul_by_elt.ll @@ -19,8 +19,7 @@ ; CHECK-LABEL: splat0_after_fmul_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov v1.4s, #3.00000000 -; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v0.4s, v0.s[0] +; CHECK-NEXT: fmul v0.4s, v1.4s, v0.s[0] ; CHECK-NEXT: ret %mul = fmul <4 x float> %a, %splat = shufflevector <4 x float> %mul, <4 x float> undef, <4 x i32> zeroinitializer @@ -44,8 +43,7 @@ ; CHECK-LABEL: splat1_after_fmul_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov v1.2d, #5.00000000 -; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d -; CHECK-NEXT: dup v0.2d, v0.d[1] +; CHECK-NEXT: fmul v0.2d, v1.2d, v0.d[1] ; CHECK-NEXT: ret %mul = fmul <2 x double> %a, %splat = shufflevector <2 x double> %mul, <2 x double> undef, <2 x i32> @@ -57,8 +55,8 @@ define <2 x double> @splat1_before_fmul(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: splat1_before_fmul: ; CHECK: // %bb.0: -; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d ; CHECK-NEXT: dup v0.2d, v0.d[1] +; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[1] ; CHECK-NEXT: ret %splata = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> %splatb = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> @@ -69,8 +67,8 @@ define <2 x double> @splat1_after_fmul(<2 x double> %a, <2 x double> %b) { ; CHECK-LABEL: splat1_after_fmul: ; CHECK: // %bb.0: -; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d ; CHECK-NEXT: dup v0.2d, v0.d[1] +; CHECK-NEXT: fmul v0.2d, v0.2d, v1.d[1] ; CHECK-NEXT: ret %mul = fmul <2 x double> %a, %b %splat = shufflevector <2 x double> %mul, <2 x double> undef, <2 x i32> @@ -94,8 +92,7 @@ ; CHECK-LABEL: splat2_after_mul_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #3 -; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v0.4s, v0.s[2] +; CHECK-NEXT: mul v0.4s, v1.4s, v0.s[2] ; CHECK-NEXT: ret %mul = mul <4 x i32> %a, %splat = shufflevector <4 x i32> %mul, <4 x i32> undef, <4 x i32> @@ -105,8 +102,8 @@ define <8 x i16> @splat1_before_mul(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: splat1_before_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h ; CHECK-NEXT: dup v0.8h, v0.h[1] +; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[1] ; CHECK-NEXT: ret %splata = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> %splatb = shufflevector <8 x i16> %b, <8 x i16> undef, <8 x i32> @@ -117,11 +114,10 @@ define <8 x i16> @splat1_after_mul(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: splat1_after_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h ; CHECK-NEXT: dup v0.8h, v0.h[1] +; CHECK-NEXT: mul v0.8h, v0.8h, v1.h[1] ; CHECK-NEXT: ret %mul = mul <8 x i16> %a, %b %splat = shufflevector <8 x i16> %mul, <8 x i16> undef, <8 x i32> ret <8 x i16> %splat } -