diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6280,18 +6280,18 @@
 // On the other hand, there are quite a few valid combinatorial options due to
 // the commutativity of multiplication and the fact that (-x) * y = x * (-y).
 defm : SIMDFPIndexedTiedPatterns<"FMLA",
-           TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)>>;
+           TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>;
 defm : SIMDFPIndexedTiedPatterns<"FMLA",
-           TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)>>;
+           TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>;
 
 defm : SIMDFPIndexedTiedPatterns<"FMLS",
-           TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
+           TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
 defm : SIMDFPIndexedTiedPatterns<"FMLS",
-           TriOpFrag<(fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
+           TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
 defm : SIMDFPIndexedTiedPatterns<"FMLS",
-           TriOpFrag<(fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
+           TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
 defm : SIMDFPIndexedTiedPatterns<"FMLS",
-           TriOpFrag<(fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
+           TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
 
 multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
   // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
@@ -6370,22 +6370,22 @@
 }
 
 defm : FMLSIndexedAfterNegPatterns<
-           TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >;
+           TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
 defm : FMLSIndexedAfterNegPatterns<
-           TriOpFrag<(fma node:$MHS, node:$RHS, node:$LHS)> >;
+           TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >;
 
 defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
 defm FMUL  : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>;
 
-def : Pat<(v2f32 (fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
+def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
           (FMULv2i32_indexed V64:$Rn,
             (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
             (i64 0))>;
-def : Pat<(v4f32 (fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
+def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
           (FMULv4i32_indexed V128:$Rn,
             (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
             (i64 0))>;
-def : Pat<(v2f64 (fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
+def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
           (FMULv2i64_indexed V128:$Rn,
             (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
             (i64 0))>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
--- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll
@@ -845,6 +845,90 @@
   ret <2 x double> %fmla1
 }
 
+define <2 x float> @fmls_indexed_2s_strict(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone ssp {
+; CHECK-LABEL: fmls_indexed_2s_strict:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    fmls.2s v0, v2, v1[0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = fneg <2 x float> %c
+  %lane = shufflevector <2 x float> %b, <2 x float> undef, <2 x i32> zeroinitializer
+  %fmls1 = tail call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %0, <2 x float> %lane, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret <2 x float> %fmls1
+}
+
+define <4 x float> @fmls_indexed_4s_strict(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone ssp {
+; CHECK-LABEL: fmls_indexed_4s_strict:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmls.4s v0, v2, v1[0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = fneg <4 x float> %c
+  %lane = shufflevector <4 x float> %b, <4 x float> undef, <4 x i32> zeroinitializer
+  %fmls1 = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %0, <4 x float> %lane, <4 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret <4 x float> %fmls1
+}
+
+define <2 x double> @fmls_indexed_2d_strict(<2 x double> %a, <2 x double> %b, <2 x double> %c) nounwind readnone ssp {
+; CHECK-LABEL: fmls_indexed_2d_strict:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fmls.2d v0, v2, v1[0]
+; CHECK-NEXT:    ret
+entry:
+  %0 = fneg <2 x double> %c
+  %lane = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> zeroinitializer
+  %fmls1 = tail call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %0, <2 x double> %lane, <2 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret <2 x double> %fmls1
+}
+
+define <2 x float> @fmla_indexed_scalar_2s_strict(<2 x float> %a, <2 x float> %b, float %c) nounwind readnone ssp {
+; CHECK-LABEL: fmla_indexed_scalar_2s_strict:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-NEXT:    fmla.2s v0, v1, v2[0]
+; CHECK-NEXT:    ret
+entry:
+  %v1 = insertelement <2 x float> undef, float %c, i32 0
+  %v2 = insertelement <2 x float> %v1, float %c, i32 1
+  %fmla1 = tail call <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float> %v2, <2 x float> %b, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret <2 x float> %fmla1
+}
+
+define <4 x float> @fmla_indexed_scalar_4s_strict(<4 x float> %a, <4 x float> %b, float %c) nounwind readnone ssp {
+; CHECK-LABEL: fmla_indexed_scalar_4s_strict:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $s2 killed $s2 def $q2
+; CHECK-NEXT:    fmla.4s v0, v1, v2[0]
+; CHECK-NEXT:    ret
+entry:
+  %v1 = insertelement <4 x float> undef, float %c, i32 0
+  %v2 = insertelement <4 x float> %v1, float %c, i32 1
+  %v3 = insertelement <4 x float> %v2, float %c, i32 2
+  %v4 = insertelement <4 x float> %v3, float %c, i32 3
+  %fmla1 = tail call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %v4, <4 x float> %b, <4 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret <4 x float> %fmla1
+}
+
+define <2 x double> @fmla_indexed_scalar_2d_strict(<2 x double> %a, <2 x double> %b, double %c) nounwind readnone ssp {
+; CHECK-LABEL: fmla_indexed_scalar_2d_strict:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $q2
+; CHECK-NEXT:    fmla.2d v0, v1, v2[0]
+; CHECK-NEXT:    ret
+entry:
+  %v1 = insertelement <2 x double> undef, double %c, i32 0
+  %v2 = insertelement <2 x double> %v1, double %c, i32 1
+  %fmla1 = tail call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %v2, <2 x double> %b, <2 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret <2 x double> %fmla1
+}
+
+attributes #0 = { strictfp }
+
+declare <2 x float> @llvm.experimental.constrained.fma.v2f32(<2 x float>, <2 x float>, <2 x float>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
+
 define <4 x i16> @mul_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ; CHECK-LABEL: mul_4h:
 ; CHECK:       // %bb.0:
diff --git a/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll b/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
--- a/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
+++ b/llvm/test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll
@@ -1,7 +1,11 @@
 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
 
+attributes #0 = { strictfp }
+
 declare float @llvm.fma.f32(float, float, float)
 declare double @llvm.fma.f64(double, double, double)
+declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
+declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
 
 define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
   ; CHECK-LABEL: test_fmla_ss4S
@@ -106,3 +110,105 @@
   ret double %tmp3
 }
 
+define float @test_fmla_ss4S_strict(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_strict
+  ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %tmp2
+}
+
+define float @test_fmla_ss4S_swap_strict(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss4S_swap_strict
+  ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %a, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %tmp2
+}
+
+define float @test_fmla_ss2S_strict(float %a, float %b, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmla_ss2S_strict
+  ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = call float @llvm.experimental.constrained.fma.f32(float %b, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %tmp2
+}
+
+define double @test_fmla_ddD_strict(double %a, double %b, <1 x double> %v) {
+  ; CHECK-LABEL: test_fmla_ddD_strict
+  ; CHECK: {{fmla d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmadd d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %b, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %tmp2
+}
+
+define double @test_fmla_dd2D_strict(double %a, double %b, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmla_dd2D_strict
+  ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %b, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %tmp2
+}
+
+define double @test_fmla_dd2D_swap_strict(double %a, double %b, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmla_dd2D_swap_strict
+  ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %b, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %tmp2
+}
+
+define float @test_fmls_ss4S_strict(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmls_ss4S_strict
+  ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = fneg float %tmp1
+  %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp2, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %tmp3
+}
+
+define float @test_fmls_ss4S_swap_strict(float %a, float %b, <4 x float> %v) {
+  ; CHECK-LABEL: test_fmls_ss4S_swap_strict
+  ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
+  %tmp1 = extractelement <4 x float> %v, i32 3
+  %tmp2 = fneg float %tmp1
+  %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp1, float %tmp2, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %tmp3
+}
+
+define float @test_fmls_ss2S_strict(float %a, float %b, <2 x float> %v) {
+  ; CHECK-LABEL: test_fmls_ss2S_strict
+  ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
+  %tmp1 = extractelement <2 x float> %v, i32 1
+  %tmp2 = fneg float %tmp1
+  %tmp3 = call float @llvm.experimental.constrained.fma.f32(float %tmp2, float %tmp1, float %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret float %tmp3
+}
+
+define double @test_fmls_ddD_strict(double %a, double %b, <1 x double> %v) {
+  ; CHECK-LABEL: test_fmls_ddD_strict
+  ; CHECK: {{fmls d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmsub d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
+  %tmp1 = extractelement <1 x double> %v, i32 0
+  %tmp2 = fneg double %tmp1
+  %tmp3 = call double @llvm.experimental.constrained.fma.f64(double %tmp2, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %tmp3
+}
+
+define double @test_fmls_dd2D_strict(double %a, double %b, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmls_dd2D_strict
+  ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = fneg double %tmp1
+  %tmp3 = call double @llvm.experimental.constrained.fma.f64(double %tmp2, double %tmp1, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %tmp3
+}
+
+define double @test_fmls_dd2D_swap_strict(double %a, double %b, <2 x double> %v) {
+  ; CHECK-LABEL: test_fmls_dd2D_swap_strict
+  ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
+  %tmp1 = extractelement <2 x double> %v, i32 1
+  %tmp2 = fneg double %tmp1
+  %tmp3 = call double @llvm.experimental.constrained.fma.f64(double %tmp1, double %tmp2, double %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
+  ret double %tmp3
+}
+