diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -212,6 +212,8 @@ FCMLEz, FCMLTz, + FCMLA, + // Vector across-lanes addition // Only the lower result lane is defined. SADDV, diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -1656,6 +1656,7 @@ MAKE_CASE(AArch64ISD::FCMP) MAKE_CASE(AArch64ISD::STRICT_FCMP) MAKE_CASE(AArch64ISD::STRICT_FCMPE) + MAKE_CASE(AArch64ISD::FCMLA) MAKE_CASE(AArch64ISD::DUP) MAKE_CASE(AArch64ISD::DUPLANE8) MAKE_CASE(AArch64ISD::DUPLANE16) diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -231,6 +231,10 @@ def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>; def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisInt<3>]>; +def SDT_AArch64vecfcmla : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<1,2>, + SDTCisSameAs<2,3>, + SDTCisInt<4>]>; def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<3>]>; @@ -902,14 +906,16 @@ def LDAPRX : RCPCLoad<0b11, "ldapr", GPR64>; } +def AArch64fcmla : SDNode<"AArch64ISD::FCMLA", SDT_AArch64vecfcmla>; + // v8.3a complex add and multiply-accumulate. No predicate here, that is done // inside the multiclass as the FP16 versions need different predicates. defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop, - "fcmla", null_frag>; + "fcmla", AArch64fcmla>; defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd, "fcadd", null_frag>; defm FCMLA : SIMDIndexedTiedComplexHSD<1, 0, 1, complexrotateop, "fcmla", - null_frag>; + AArch64fcmla>; let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in { def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),