diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -788,6 +788,16 @@
 
   bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override;
 
+  bool isComplexDeinterleavingSupported() const override;
+  bool isComplexDeinterleavingOperationSupported(
+      ComplexDeinterleavingOperation Operation, Type *Ty) const override;
+
+  Value *
+  createComplexDeinterleavingIR(Instruction *I,
+                                ComplexDeinterleavingOperation OperationType,
+                                unsigned Rotation, Value *InputA, Value *InputB,
+                                Value *Accumulator = nullptr) const override;
+
   bool hasBitPreservingFPLogic(EVT VT) const override {
     // FIXME: Is this always true? It should be true for vectors at least.
     return VT == MVT::f32 || VT == MVT::f64;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -21409,3 +21409,88 @@
     unsigned Opc, LLT Ty1, LLT Ty2) const {
   return Ty1 == Ty2 && (Ty1 == LLT::scalar(32) || Ty1 == LLT::scalar(64));
 }
+bool AArch64TargetLowering::isComplexDeinterleavingSupported() const {
+  return Subtarget->hasComplxNum() && Subtarget->hasNEON();
+}
+bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported(
+    ComplexDeinterleavingOperation Operation, Type *Ty) const {
+  auto *VTy = dyn_cast<FixedVectorType>(Ty);
+  if (!VTy)
+    return false;
+
+  auto *ScalarTy = VTy->getScalarType();
+  unsigned NumElements = VTy->getNumElements();
+  if (ScalarTy->isHalfTy() && Subtarget->hasFullFP16())
+    return NumElements == 4 || NumElements == 8;
+  if (ScalarTy->isFloatTy())
+    return NumElements == 2 || NumElements == 4;
+  if (ScalarTy->isDoubleTy())
+    return NumElements == 2;
+  return false;
+}
+
+Value *AArch64TargetLowering::createComplexDeinterleavingIR(
+    Instruction *I, ComplexDeinterleavingOperation OperationType,
+    unsigned Rotation, Value *InputA, Value *InputB, Value *Accumulator) const {
+  auto *Ty = InputA->getType();
+
+  if (Accumulator == nullptr)
+    Accumulator = ConstantFP::get(Ty, 0);
+
+  IRBuilder<> B(I);
+
+  if (OperationType == ComplexDeinterleavingOperation::CMulPartial) {
+
+    Intrinsic::ID IntId = Intrinsic::not_intrinsic;
+
+    Intrinsic::ID IdMap[4] = {Intrinsic::aarch64_neon_vcmla_rot0,
+                              Intrinsic::aarch64_neon_vcmla_rot90,
+                              Intrinsic::aarch64_neon_vcmla_rot180,
+                              Intrinsic::aarch64_neon_vcmla_rot270};
+
+    const char *IdNames[4] = {
+        "aarch64_neon_vcmla_rot0", "aarch64_neon_vcmla_rot90",
+        "aarch64_neon_vcmla_rot180", "aarch64_neon_vcmla_rot270"};
+
+    unsigned IntIdx = Rotation / 90;
+    IntId = IdMap[IntIdx];
+    if (IntId == Intrinsic::not_intrinsic)
+      return nullptr;
+
+    dbgs() << "IntId: " << IntId << ", Name: " << IdNames[IntIdx] << ".\n";
+    dbgs() << "Type: ";
+    Ty->dump();
+    dbgs() << "Accumulator: ";
+    if (Accumulator)
+      Accumulator->dump();
+    else
+      dbgs() << "nullptr.\n";
+    dbgs() << "InputA: ";
+    if (InputA)
+      InputA->dump();
+    else
+      dbgs() << "nullptr.\n";
+    dbgs() << "InputB: ";
+    if (InputB)
+      InputB->dump();
+    else
+      dbgs() << "nullptr.\n";
+
+    return B.CreateIntrinsic(IntId, Ty, {Accumulator, InputB, InputA});
+  }
+
+  if (OperationType == ComplexDeinterleavingOperation::CAdd) {
+    Intrinsic::ID IntId = Intrinsic::not_intrinsic;
+    if (Rotation == 90)
+      IntId = Intrinsic::aarch64_neon_vcadd_rot90;
+    else if (Rotation == 270)
+      IntId = Intrinsic::aarch64_neon_vcadd_rot270;
+
+    if (IntId == Intrinsic::not_intrinsic)
+      return nullptr;
+
+    return B.CreateIntrinsic(IntId, Ty, {InputA, InputB});
+  }
+
+  return nullptr;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -568,6 +568,10 @@
   addPass(createAArch64StackTaggingPass(
       /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None));
 
+  // Match complex arithmetic patterns
+  if (TM->getOptLevel() >= CodeGenOpt::Default)
+    addPass(createComplexDeinterleavingPass(TM));
+
   // Match interleaved memory accesses to ldN/stN intrinsics.
   if (TM->getOptLevel() != CodeGenOpt::None) {
     addPass(createInterleavedLoadCombinePass());
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
+#include "llvm/CodeGen/ComplexDeinterleavingPass.h"
 #include "llvm/CodeGen/CostTable.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/IR/IntrinsicInst.h"
diff --git a/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f16-add.ll b/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f16-add.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f16-add.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
+
+target triple = "aarch64-arm-none-eabi"
+
+
+
+define <2 x half> @complex_add_v2f16(<2 x half> %a, <2 x half> %b) {
+; CHECK-LABEL: complex_add_v2f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov h2, v0.h[1]
+; CHECK-NEXT:    mov h3, v1.h[1]
+; CHECK-NEXT:    fsub h1, h1, h2
+; CHECK-NEXT:    fadd h0, h3, h0
+; CHECK-NEXT:    mov v1.h[1], v0.h[0]
+; CHECK-NEXT:    fmov d0, d1
+; CHECK-NEXT:    ret
+entry:
+  %a.real = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 0>
+  %a.imag = shufflevector <2 x half> %a, <2 x half> zeroinitializer, <1 x i32> <i32 1>
+  %b.real = shufflevector <2 x half> %b, <2 x half> zeroinitializer, <1 x i32> <i32 0>
+  %b.imag = shufflevector <2 x half> %b, <2 x half> zeroinitializer, <1 x i32> <i32 1>
+  %0 = fsub fast <1 x half> %b.real, %a.imag
+  %1 = fadd fast <1 x half> %b.imag, %a.real
+  %interleaved.vec = shufflevector <1 x half> %0, <1 x half> %1, <2 x i32> <i32 0, i32 1>
+  ret <2 x half> %interleaved.vec
+}
+define <4 x half> @complex_add_v4f16(<4 x half> %a, <4 x half> %b) {
+; CHECK-LABEL: complex_add_v4f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcadd v0.4h, v1.4h, v0.4h, #90
+; CHECK-NEXT:    ret
+entry:
+  %a.real = shufflevector <4 x half> %a, <4 x half> zeroinitializer, <2 x i32> <i32 0, i32 2>
+  %a.imag = shufflevector <4 x half> %a, <4 x half> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %b.real = shufflevector <4 x half> %b, <4 x half> zeroinitializer, <2 x i32> <i32 0, i32 2>
+  %b.imag = shufflevector <4 x half> %b, <4 x half> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %0 = fsub fast <2 x half> %b.real, %a.imag
+  %1 = fadd fast <2 x half> %b.imag, %a.real
+  %interleaved.vec = shufflevector <2 x half> %0, <2 x half> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x half> %interleaved.vec
+}
+define <8 x half> @complex_add_v8f16(<8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: complex_add_v8f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcadd v0.8h, v1.8h, v0.8h, #90
+; CHECK-NEXT:    ret
+entry:
+  %a.real = shufflevector <8 x half> %a, <8 x half> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %a.imag = shufflevector <8 x half> %a, <8 x half> zeroinitializer, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %b.real = shufflevector <8 x half> %b, <8 x half> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b.imag = shufflevector <8 x half> %b, <8 x half> zeroinitializer, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %0 = fsub fast <4 x half> %b.real, %a.imag
+  %1 = fadd fast <4 x half> %b.imag, %a.real
+  %interleaved.vec = shufflevector <4 x half> %0, <4 x half> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+  ret <8 x half> %interleaved.vec
+}
+define <16 x half> @complex_add_v16f16(<16 x half> %a, <16 x half> %b) {
+; CHECK-LABEL: complex_add_v16f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 v4.8h, v2.8h, v3.8h
+; CHECK-NEXT:    uzp1 v5.8h, v0.8h, v1.8h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    uzp2 v1.8h, v2.8h, v3.8h
+; CHECK-NEXT:    fsub v2.8h, v4.8h, v0.8h
+; CHECK-NEXT:    fadd v1.8h, v1.8h, v5.8h
+; CHECK-NEXT:    zip1 v0.8h, v2.8h, v1.8h
+; CHECK-NEXT:    zip2 v1.8h, v2.8h, v1.8h
+; CHECK-NEXT:    ret
+entry:
+  %a.real = shufflevector <16 x half> %a, <16 x half> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %a.imag = shufflevector <16 x half> %a, <16 x half> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %b.real = shufflevector <16 x half> %b, <16 x half> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %b.imag = shufflevector <16 x half> %b, <16 x half> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %0 = fsub fast <8 x half> %b.real, %a.imag
+  %1 = fadd fast <8 x half> %b.imag, %a.real
+  %interleaved.vec = shufflevector <8 x half> %0, <8 x half> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <16 x half> %interleaved.vec
+}
+define <32 x half> @complex_add_v32f16(<32 x half> %a, <32 x half> %b) {
+; CHECK-LABEL: complex_add_v32f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 v16.8h, v4.8h, v5.8h
+; CHECK-NEXT:    uzp1 v17.8h, v2.8h, v3.8h
+; CHECK-NEXT:    uzp1 v18.8h, v0.8h, v1.8h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    uzp2 v1.8h, v2.8h, v3.8h
+; CHECK-NEXT:    uzp2 v2.8h, v4.8h, v5.8h
+; CHECK-NEXT:    uzp1 v3.8h, v6.8h, v7.8h
+; CHECK-NEXT:    uzp2 v4.8h, v6.8h, v7.8h
+; CHECK-NEXT:    fsub v5.8h, v16.8h, v0.8h
+; CHECK-NEXT:    fadd v2.8h, v2.8h, v18.8h
+; CHECK-NEXT:    fsub v3.8h, v3.8h, v1.8h
+; CHECK-NEXT:    fadd v4.8h, v4.8h, v17.8h
+; CHECK-NEXT:    zip1 v0.8h, v5.8h, v2.8h
+; CHECK-NEXT:    zip2 v1.8h, v5.8h, v2.8h
+; CHECK-NEXT:    zip1 v2.8h, v3.8h, v4.8h
+; CHECK-NEXT:    zip2 v3.8h, v3.8h, v4.8h
+; CHECK-NEXT:    ret
+entry:
+  %a.real = shufflevector <32 x half> %a, <32 x half> zeroinitializer, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %a.imag = shufflevector <32 x half> %a, <32 x half> zeroinitializer, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  %b.real = shufflevector <32 x half> %b, <32 x half> zeroinitializer, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %b.imag = shufflevector <32 x half> %b, <32 x half> zeroinitializer, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  %0 = fsub fast <16 x half> %b.real, %a.imag
+  %1 = fadd fast <16 x half> %b.imag, %a.real
+  %interleaved.vec = shufflevector <16 x half> %0, <16 x half> %1, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <32 x half> %interleaved.vec
+}
diff --git a/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f16-mul.ll b/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f16-mul.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f16-mul.ll
@@ -0,0 +1,150 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
+
+target triple = "aarch64-arm-none-eabi"
+
+define <2 x half> @complex_mul_v2f16(<2 x half> %a, <2 x half> %b) {
+; CHECK-LABEL: complex_mul_v2f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
+; CHECK-NEXT:    // kill: def $d0 killed $d0 def $q0
+; CHECK-NEXT:    mov h3, v0.h[1]
+; CHECK-NEXT:    mov h2, v1.h[1]
+; CHECK-NEXT:    fmul h4, h2, v0.h[0]
+; CHECK-NEXT:    fnmul h2, h3, h2
+; CHECK-NEXT:    fmla h4, h3, v1.h[0]
+; CHECK-NEXT:    fmla h2, h0, v1.h[0]
+; CHECK-NEXT:    mov v2.h[1], v4.h[0]
+; CHECK-NEXT:    fmov d0, d2
+; CHECK-NEXT:    ret
+entry:
+  %a.real   = shufflevector <2 x half> %a, <2 x half> poison, <1 x i32> <i32 0>
+  %a.imag = shufflevector <2 x half> %a, <2 x half> poison, <1 x i32> <i32 1>
+  %b.real = shufflevector <2 x half> %b, <2 x half> poison, <1 x i32> <i32 0>
+  %b.imag = shufflevector <2 x half> %b, <2 x half> poison, <1 x i32> <i32 1>
+  %0 = fmul fast <1 x half> %b.imag, %a.real
+  %1 = fmul fast <1 x half> %b.real, %a.imag
+  %2 = fadd fast <1 x half> %1, %0
+  %3 = fmul fast <1 x half> %b.real, %a.real
+  %4 = fmul fast <1 x half> %a.imag, %b.imag
+  %5 = fsub fast <1 x half> %3, %4
+  %interleaved.vec = shufflevector <1 x half> %5, <1 x half> %2, <2 x i32> <i32 0, i32 1>
+  ret <2 x half> %interleaved.vec
+}
+
+define <4 x half> @complex_mul_v4f16(<4 x half> %a, <4 x half> %b) {
+; CHECK-LABEL: complex_mul_v4f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d2, #0000000000000000
+; CHECK-NEXT:    fcmla v2.4h, v0.4h, v1.4h, #90
+; CHECK-NEXT:    fcmla v2.4h, v0.4h, v1.4h, #0
+; CHECK-NEXT:    fmov d0, d2
+; CHECK-NEXT:    ret
+entry:
+  %a.real   = shufflevector <4 x half> %a, <4 x half> poison, <2 x i32> <i32 0, i32 2>
+  %a.imag = shufflevector <4 x half> %a, <4 x half> poison, <2 x i32> <i32 1, i32 3>
+  %b.real = shufflevector <4 x half> %b, <4 x half> poison, <2 x i32> <i32 0, i32 2>
+  %b.imag = shufflevector <4 x half> %b, <4 x half> poison, <2 x i32> <i32 1, i32 3>
+  %0 = fmul fast <2 x half> %b.imag, %a.real
+  %1 = fmul fast <2 x half> %b.real, %a.imag
+  %2 = fadd fast <2 x half> %1, %0
+  %3 = fmul fast <2 x half> %b.real, %a.real
+  %4 = fmul fast <2 x half> %a.imag, %b.imag
+  %5 = fsub fast <2 x half> %3, %4
+  %interleaved.vec = shufflevector <2 x half> %5, <2 x half> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x half> %interleaved.vec
+}
+
+define <8 x half> @complex_mul_v8f16(<8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: complex_mul_v8f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-NEXT:    fcmla v2.8h, v0.8h, v1.8h, #90
+; CHECK-NEXT:    fcmla v2.8h, v0.8h, v1.8h, #0
+; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    ret
+entry:
+  %a.real   = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %a.imag = shufflevector <8 x half> %a, <8 x half> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %b.real = shufflevector <8 x half> %b, <8 x half> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b.imag = shufflevector <8 x half> %b, <8 x half> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %0 = fmul fast <4 x half> %b.imag, %a.real
+  %1 = fmul fast <4 x half> %b.real, %a.imag
+  %2 = fadd fast <4 x half> %1, %0
+  %3 = fmul fast <4 x half> %b.real, %a.real
+  %4 = fmul fast <4 x half> %a.imag, %b.imag
+  %5 = fsub fast <4 x half> %3, %4
+  %interleaved.vec = shufflevector <4 x half> %5, <4 x half> %2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+  ret <8 x half> %interleaved.vec
+}
+
+define <16 x half> @complex_mul_v16f16(<16 x half> %a, <16 x half> %b) {
+; CHECK-LABEL: complex_mul_v16f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp2 v4.8h, v2.8h, v3.8h
+; CHECK-NEXT:    uzp2 v5.8h, v0.8h, v1.8h
+; CHECK-NEXT:    uzp1 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    uzp1 v2.8h, v2.8h, v3.8h
+; CHECK-NEXT:    fmul v1.8h, v5.8h, v4.8h
+; CHECK-NEXT:    fmul v3.8h, v4.8h, v0.8h
+; CHECK-NEXT:    fneg v1.8h, v1.8h
+; CHECK-NEXT:    fmla v3.8h, v5.8h, v2.8h
+; CHECK-NEXT:    fmla v1.8h, v0.8h, v2.8h
+; CHECK-NEXT:    zip1 v0.8h, v1.8h, v3.8h
+; CHECK-NEXT:    zip2 v1.8h, v1.8h, v3.8h
+; CHECK-NEXT:    ret
+entry:
+  %a.real   = shufflevector <16 x half> %a, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %a.imag = shufflevector <16 x half> %a, <16 x half> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %b.real = shufflevector <16 x half> %b, <16 x half> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %b.imag = shufflevector <16 x half> %b, <16 x half> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %0 = fmul fast <8 x half> %b.imag, %a.real
+  %1 = fmul fast <8 x half> %b.real, %a.imag
+  %2 = fadd fast <8 x half> %1, %0
+  %3 = fmul fast <8 x half> %b.real, %a.real
+  %4 = fmul fast <8 x half> %a.imag, %b.imag
+  %5 = fsub fast <8 x half> %3, %4
+  %interleaved.vec = shufflevector <8 x half> %5, <8 x half> %2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <16 x half> %interleaved.vec
+}
+
+define <32 x half> @complex_mul_v32f16(<32 x half> %a, <32 x half> %b) {
+; CHECK-LABEL: complex_mul_v32f16:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp2 v16.8h, v4.8h, v5.8h
+; CHECK-NEXT:    uzp1 v17.8h, v2.8h, v3.8h
+; CHECK-NEXT:    uzp1 v18.8h, v0.8h, v1.8h
+; CHECK-NEXT:    uzp2 v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    uzp2 v1.8h, v2.8h, v3.8h
+; CHECK-NEXT:    uzp2 v2.8h, v6.8h, v7.8h
+; CHECK-NEXT:    uzp1 v3.8h, v4.8h, v5.8h
+; CHECK-NEXT:    fmul v4.8h, v0.8h, v16.8h
+; CHECK-NEXT:    uzp1 v5.8h, v6.8h, v7.8h
+; CHECK-NEXT:    fmul v6.8h, v1.8h, v2.8h
+; CHECK-NEXT:    fmul v7.8h, v16.8h, v18.8h
+; CHECK-NEXT:    fneg v4.8h, v4.8h
+; CHECK-NEXT:    fmul v16.8h, v2.8h, v17.8h
+; CHECK-NEXT:    fneg v6.8h, v6.8h
+; CHECK-NEXT:    fmla v7.8h, v0.8h, v3.8h
+; CHECK-NEXT:    fmla v4.8h, v18.8h, v3.8h
+; CHECK-NEXT:    fmla v16.8h, v1.8h, v5.8h
+; CHECK-NEXT:    fmla v6.8h, v17.8h, v5.8h
+; CHECK-NEXT:    zip1 v0.8h, v4.8h, v7.8h
+; CHECK-NEXT:    zip2 v1.8h, v4.8h, v7.8h
+; CHECK-NEXT:    zip1 v2.8h, v6.8h, v16.8h
+; CHECK-NEXT:    zip2 v3.8h, v6.8h, v16.8h
+; CHECK-NEXT:    ret
+entry:
+  %a.real   = shufflevector <32 x half> %a, <32 x half> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %a.imag = shufflevector <32 x half> %a, <32 x half> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  %b.real = shufflevector <32 x half> %b, <32 x half> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+  %b.imag = shufflevector <32 x half> %b, <32 x half> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
+  %0 = fmul fast <16 x half> %b.imag, %a.real
+  %1 = fmul fast <16 x half> %b.real, %a.imag
+  %2 = fadd fast <16 x half> %1, %0
+  %3 = fmul fast <16 x half> %b.real, %a.real
+  %4 = fmul fast <16 x half> %a.imag, %b.imag
+  %5 = fsub fast <16 x half> %3, %4
+  %interleaved.vec = shufflevector <16 x half> %5, <16 x half> %2, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
+  ret <32 x half> %interleaved.vec
+}
diff --git a/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f32-add.ll b/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f32-add.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f32-add.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
+
+target triple = "aarch64-arm-none-eabi"
+
+
+
+define <2 x float> @complex_add_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: complex_add_v2f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcadd v0.2s, v1.2s, v0.2s, #90
+; CHECK-NEXT:    ret
+entry:
+  %a.real = shufflevector <2 x float> %a, <2 x float> zeroinitializer, <1 x i32> <i32 0>
+  %a.imag = shufflevector <2 x float> %a, <2 x float> zeroinitializer, <1 x i32> <i32 1>
+  %b.real = shufflevector <2 x float> %b, <2 x float> zeroinitializer, <1 x i32> <i32 0>
+  %b.imag = shufflevector <2 x float> %b, <2 x float> zeroinitializer, <1 x i32> <i32 1>
+  %0 = fsub fast <1 x float> %b.real, %a.imag
+  %1 = fadd fast <1 x float> %b.imag, %a.real
+  %interleaved.vec = shufflevector <1 x float> %0, <1 x float> %1, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %interleaved.vec
+}
+define <4 x float> @complex_add_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: complex_add_v4f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcadd v0.4s, v1.4s, v0.4s, #90
+; CHECK-NEXT:    ret
+entry:
+  %a.real = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <2 x i32> <i32 0, i32 2>
+  %a.imag = shufflevector <4 x float> %a, <4 x float> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %b.real = shufflevector <4 x float> %b, <4 x float> zeroinitializer, <2 x i32> <i32 0, i32 2>
+  %b.imag = shufflevector <4 x float> %b, <4 x float> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %0 = fsub fast <2 x float> %b.real, %a.imag
+  %1 = fadd fast <2 x float> %b.imag, %a.real
+  %interleaved.vec = shufflevector <2 x float> %0, <2 x float> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x float> %interleaved.vec
+}
+define <8 x float> @complex_add_v8f32(<8 x float> %a, <8 x float> %b) {
+; CHECK-LABEL: complex_add_v8f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 v4.4s, v2.4s, v3.4s
+; CHECK-NEXT:    uzp1 v5.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uzp2 v1.4s, v2.4s, v3.4s
+; CHECK-NEXT:    fsub v2.4s, v4.4s, v0.4s
+; CHECK-NEXT:    fadd v1.4s, v1.4s, v5.4s
+; CHECK-NEXT:    zip1 v0.4s, v2.4s, v1.4s
+; CHECK-NEXT:    zip2 v1.4s, v2.4s, v1.4s
+; CHECK-NEXT:    ret
+entry:
+  %a.real = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %a.imag = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %b.real = shufflevector <8 x float> %b, <8 x float> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b.imag = shufflevector <8 x float> %b, <8 x float> zeroinitializer, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %0 = fsub fast <4 x float> %b.real, %a.imag
+  %1 = fadd fast <4 x float> %b.imag, %a.real
+  %interleaved.vec = shufflevector <4 x float> %0, <4 x float> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+  ret <8 x float> %interleaved.vec
+}
+define <16 x float> @complex_add_v16f32(<16 x float> %a, <16 x float> %b) {
+; CHECK-LABEL: complex_add_v16f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp1 v16.4s, v4.4s, v5.4s
+; CHECK-NEXT:    uzp1 v17.4s, v2.4s, v3.4s
+; CHECK-NEXT:    uzp1 v18.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uzp2 v1.4s, v2.4s, v3.4s
+; CHECK-NEXT:    uzp2 v2.4s, v4.4s, v5.4s
+; CHECK-NEXT:    uzp1 v3.4s, v6.4s, v7.4s
+; CHECK-NEXT:    uzp2 v4.4s, v6.4s, v7.4s
+; CHECK-NEXT:    fsub v5.4s, v16.4s, v0.4s
+; CHECK-NEXT:    fadd v2.4s, v2.4s, v18.4s
+; CHECK-NEXT:    fsub v3.4s, v3.4s, v1.4s
+; CHECK-NEXT:    fadd v4.4s, v4.4s, v17.4s
+; CHECK-NEXT:    zip1 v0.4s, v5.4s, v2.4s
+; CHECK-NEXT:    zip2 v1.4s, v5.4s, v2.4s
+; CHECK-NEXT:    zip1 v2.4s, v3.4s, v4.4s
+; CHECK-NEXT:    zip2 v3.4s, v3.4s, v4.4s
+; CHECK-NEXT:    ret
+entry:
+  %a.real = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %a.imag = shufflevector <16 x float> %a, <16 x float> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %b.real = shufflevector <16 x float> %b, <16 x float> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %b.imag = shufflevector <16 x float> %b, <16 x float> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %0 = fsub fast <8 x float> %b.real, %a.imag
+  %1 = fadd fast <8 x float> %b.imag, %a.real
+  %interleaved.vec = shufflevector <8 x float> %0, <8 x float> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <16 x float> %interleaved.vec
+}
diff --git a/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f32-mul.ll b/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f32-mul.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f32-mul.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
+
+target triple = "aarch64-arm-none-eabi"
+
+define <2 x float> @complex_mul_v2f32(<2 x float> %a, <2 x float> %b) {
+; CHECK-LABEL: complex_mul_v2f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi d2, #0000000000000000
+; CHECK-NEXT:    fcmla v2.2s, v0.2s, v1.2s, #90
+; CHECK-NEXT:    fcmla v2.2s, v0.2s, v1.2s, #0
+; CHECK-NEXT:    fmov d0, d2
+; CHECK-NEXT:    ret
+entry:
+  %a.real   = shufflevector <2 x float> %a, <2 x float> poison, <1 x i32> <i32 0>
+  %a.imag = shufflevector <2 x float> %a, <2 x float> poison, <1 x i32> <i32 1>
+  %b.real = shufflevector <2 x float> %b, <2 x float> poison, <1 x i32> <i32 0>
+  %b.imag = shufflevector <2 x float> %b, <2 x float> poison, <1 x i32> <i32 1>
+  %0 = fmul fast <1 x float> %b.imag, %a.real
+  %1 = fmul fast <1 x float> %b.real, %a.imag
+  %2 = fadd fast <1 x float> %1, %0
+  %3 = fmul fast <1 x float> %b.real, %a.real
+  %4 = fmul fast <1 x float> %a.imag, %b.imag
+  %5 = fsub fast <1 x float> %3, %4
+  %interleaved.vec = shufflevector <1 x float> %5, <1 x float> %2, <2 x i32> <i32 0, i32 1>
+  ret <2 x float> %interleaved.vec
+}
+
+define <4 x float> @complex_mul_v4f32(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: complex_mul_v4f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-NEXT:    fcmla v2.4s, v0.4s, v1.4s, #90
+; CHECK-NEXT:    fcmla v2.4s, v0.4s, v1.4s, #0
+; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    ret
+entry:
+  %a.real   = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 0, i32 2>
+  %a.imag = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> <i32 1, i32 3>
+  %b.real = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 0, i32 2>
+  %b.imag = shufflevector <4 x float> %b, <4 x float> poison, <2 x i32> <i32 1, i32 3>
+  %0 = fmul fast <2 x float> %b.imag, %a.real
+  %1 = fmul fast <2 x float> %b.real, %a.imag
+  %2 = fadd fast <2 x float> %1, %0
+  %3 = fmul fast <2 x float> %b.real, %a.real
+  %4 = fmul fast <2 x float> %a.imag, %b.imag
+  %5 = fsub fast <2 x float> %3, %4
+  %interleaved.vec = shufflevector <2 x float> %5, <2 x float> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x float> %interleaved.vec
+}
+
+define <8 x float> @complex_mul_v8f32(<8 x float> %a, <8 x float> %b) {
+; CHECK-LABEL: complex_mul_v8f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp2 v4.4s, v2.4s, v3.4s
+; CHECK-NEXT:    uzp2 v5.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uzp1 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uzp1 v2.4s, v2.4s, v3.4s
+; CHECK-NEXT:    fmul v1.4s, v5.4s, v4.4s
+; CHECK-NEXT:    fmul v3.4s, v4.4s, v0.4s
+; CHECK-NEXT:    fneg v1.4s, v1.4s
+; CHECK-NEXT:    fmla v3.4s, v5.4s, v2.4s
+; CHECK-NEXT:    fmla v1.4s, v0.4s, v2.4s
+; CHECK-NEXT:    zip1 v0.4s, v1.4s, v3.4s
+; CHECK-NEXT:    zip2 v1.4s, v1.4s, v3.4s
+; CHECK-NEXT:    ret
+entry:
+  %a.real   = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %a.imag = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %b.real = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b.imag = shufflevector <8 x float> %b, <8 x float> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %0 = fmul fast <4 x float> %b.imag, %a.real
+  %1 = fmul fast <4 x float> %b.real, %a.imag
+  %2 = fadd fast <4 x float> %1, %0
+  %3 = fmul fast <4 x float> %b.real, %a.real
+  %4 = fmul fast <4 x float> %a.imag, %b.imag
+  %5 = fsub fast <4 x float> %3, %4
+  %interleaved.vec = shufflevector <4 x float> %5, <4 x float> %2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+  ret <8 x float> %interleaved.vec
+}
+
+define <16 x float> @complex_mul_v16f32(<16 x float> %a, <16 x float> %b) {
+; CHECK-LABEL: complex_mul_v16f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    uzp2 v16.4s, v4.4s, v5.4s
+; CHECK-NEXT:    uzp1 v17.4s, v2.4s, v3.4s
+; CHECK-NEXT:    uzp1 v18.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uzp2 v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    uzp2 v1.4s, v2.4s, v3.4s
+; CHECK-NEXT:    uzp2 v2.4s, v6.4s, v7.4s
+; CHECK-NEXT:    uzp1 v3.4s, v4.4s, v5.4s
+; CHECK-NEXT:    fmul v4.4s, v0.4s, v16.4s
+; CHECK-NEXT:    uzp1 v5.4s, v6.4s, v7.4s
+; CHECK-NEXT:    fmul v6.4s, v1.4s, v2.4s
+; CHECK-NEXT:    fmul v7.4s, v16.4s, v18.4s
+; CHECK-NEXT:    fneg v4.4s, v4.4s
+; CHECK-NEXT:    fmul v16.4s, v2.4s, v17.4s
+; CHECK-NEXT:    fneg v6.4s, v6.4s
+; CHECK-NEXT:    fmla v7.4s, v0.4s, v3.4s
+; CHECK-NEXT:    fmla v4.4s, v18.4s, v3.4s
+; CHECK-NEXT:    fmla v16.4s, v1.4s, v5.4s
+; CHECK-NEXT:    fmla v6.4s, v17.4s, v5.4s
+; CHECK-NEXT:    zip1 v0.4s, v4.4s, v7.4s
+; CHECK-NEXT:    zip2 v1.4s, v4.4s, v7.4s
+; CHECK-NEXT:    zip1 v2.4s, v6.4s, v16.4s
+; CHECK-NEXT:    zip2 v3.4s, v6.4s, v16.4s
+; CHECK-NEXT:    ret
+entry:
+  %a.real   = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %a.imag = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %b.real = shufflevector <16 x float> %b, <16 x float> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+  %b.imag = shufflevector <16 x float> %b, <16 x float> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
+  %0 = fmul fast <8 x float> %b.imag, %a.real
+  %1 = fmul fast <8 x float> %b.real, %a.imag
+  %2 = fadd fast <8 x float> %1, %0
+  %3 = fmul fast <8 x float> %b.real, %a.real
+  %4 = fmul fast <8 x float> %a.imag, %b.imag
+  %5 = fsub fast <8 x float> %3, %4
+  %interleaved.vec = shufflevector <8 x float> %5, <8 x float> %2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
+  ret <16 x float> %interleaved.vec
+}
diff --git a/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f64-add.ll b/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f64-add.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f64-add.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
+
+target triple = "aarch64-arm-none-eabi"
+
+
+
+define <2 x double> @complex_add_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: complex_add_v2f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcadd v0.2d, v1.2d, v0.2d, #90
+; CHECK-NEXT:    ret
+entry:
+  %a.real = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <1 x i32> <i32 0>
+  %a.imag = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <1 x i32> <i32 1>
+  %b.real = shufflevector <2 x double> %b, <2 x double> zeroinitializer, <1 x i32> <i32 0>
+  %b.imag = shufflevector <2 x double> %b, <2 x double> zeroinitializer, <1 x i32> <i32 1>
+  %0 = fsub fast <1 x double> %b.real, %a.imag
+  %1 = fadd fast <1 x double> %b.imag, %a.real
+  %interleaved.vec = shufflevector <1 x double> %0, <1 x double> %1, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %interleaved.vec
+}
+define <4 x double> @complex_add_v4f64(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: complex_add_v4f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    zip1 v4.2d, v2.2d, v3.2d
+; CHECK-NEXT:    zip1 v5.2d, v0.2d, v1.2d
+; CHECK-NEXT:    zip2 v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    zip2 v1.2d, v2.2d, v3.2d
+; CHECK-NEXT:    fsub v2.2d, v4.2d, v0.2d
+; CHECK-NEXT:    fadd v1.2d, v1.2d, v5.2d
+; CHECK-NEXT:    zip1 v0.2d, v2.2d, v1.2d
+; CHECK-NEXT:    zip2 v1.2d, v2.2d, v1.2d
+; CHECK-NEXT:    ret
+entry:
+  %a.real = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <2 x i32> <i32 0, i32 2>
+  %a.imag = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %b.real = shufflevector <4 x double> %b, <4 x double> zeroinitializer, <2 x i32> <i32 0, i32 2>
+  %b.imag = shufflevector <4 x double> %b, <4 x double> zeroinitializer, <2 x i32> <i32 1, i32 3>
+  %0 = fsub fast <2 x double> %b.real, %a.imag
+  %1 = fadd fast <2 x double> %b.imag, %a.real
+  %interleaved.vec = shufflevector <2 x double> %0, <2 x double> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x double> %interleaved.vec
+}
+define <8 x double> @complex_add_v8f64(<8 x double> %a, <8 x double> %b) {
+; CHECK-LABEL: complex_add_v8f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    zip1 v16.2d, v4.2d, v5.2d
+; CHECK-NEXT:    zip1 v17.2d, v2.2d, v3.2d
+; CHECK-NEXT:    zip1 v18.2d, v0.2d, v1.2d
+; CHECK-NEXT:    zip2 v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    zip2 v1.2d, v2.2d, v3.2d
+; CHECK-NEXT:    zip2 v2.2d, v4.2d, v5.2d
+; CHECK-NEXT:    zip1 v3.2d, v6.2d, v7.2d
+; CHECK-NEXT:    zip2 v4.2d, v6.2d, v7.2d
+; CHECK-NEXT:    fsub v5.2d, v16.2d, v0.2d
+; CHECK-NEXT:    fadd v2.2d, v2.2d, v18.2d
+; CHECK-NEXT:    fsub v3.2d, v3.2d, v1.2d
+; CHECK-NEXT:    fadd v4.2d, v4.2d, v17.2d
+; CHECK-NEXT:    zip1 v0.2d, v5.2d, v2.2d
+; CHECK-NEXT:    zip2 v1.2d, v5.2d, v2.2d
+; CHECK-NEXT:    zip1 v2.2d, v3.2d, v4.2d
+; CHECK-NEXT:    zip2 v3.2d, v3.2d, v4.2d
+; CHECK-NEXT:    ret
+entry:
+  %a.real = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %a.imag = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %b.real = shufflevector <8 x double> %b, <8 x double> zeroinitializer, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b.imag = shufflevector <8 x double> %b, <8 x double> zeroinitializer, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %0 = fsub fast <4 x double> %b.real, %a.imag
+  %1 = fadd fast <4 x double> %b.imag, %a.real
+  %interleaved.vec = shufflevector <4 x double> %0, <4 x double> %1, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+  ret <8 x double> %interleaved.vec
+}
diff --git a/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f64-mul.ll b/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f64-mul.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ComplexArithmetic/complex-arithmetic-f64-mul.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
+
+target triple = "aarch64-arm-none-eabi"
+
+define <2 x double> @complex_mul_v2f64(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: complex_mul_v2f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v2.2d, #0000000000000000
+; CHECK-NEXT:    fcmla v2.2d, v0.2d, v1.2d, #90
+; CHECK-NEXT:    fcmla v2.2d, v0.2d, v1.2d, #0
+; CHECK-NEXT:    mov v0.16b, v2.16b
+; CHECK-NEXT:    ret
+entry:
+  %a.real   = shufflevector <2 x double> %a, <2 x double> poison, <1 x i32> <i32 0>
+  %a.imag = shufflevector <2 x double> %a, <2 x double> poison, <1 x i32> <i32 1>
+  %b.real = shufflevector <2 x double> %b, <2 x double> poison, <1 x i32> <i32 0>
+  %b.imag = shufflevector <2 x double> %b, <2 x double> poison, <1 x i32> <i32 1>
+  %0 = fmul fast <1 x double> %b.imag, %a.real
+  %1 = fmul fast <1 x double> %b.real, %a.imag
+  %2 = fadd fast <1 x double> %1, %0
+  %3 = fmul fast <1 x double> %b.real, %a.real
+  %4 = fmul fast <1 x double> %a.imag, %b.imag
+  %5 = fsub fast <1 x double> %3, %4
+  %interleaved.vec = shufflevector <1 x double> %5, <1 x double> %2, <2 x i32> <i32 0, i32 1>
+  ret <2 x double> %interleaved.vec
+}
+
+define <4 x double> @complex_mul_v4f64(<4 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: complex_mul_v4f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    zip2 v4.2d, v2.2d, v3.2d
+; CHECK-NEXT:    zip2 v5.2d, v0.2d, v1.2d
+; CHECK-NEXT:    zip1 v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    zip1 v2.2d, v2.2d, v3.2d
+; CHECK-NEXT:    fmul v1.2d, v5.2d, v4.2d
+; CHECK-NEXT:    fmul v3.2d, v4.2d, v0.2d
+; CHECK-NEXT:    fneg v1.2d, v1.2d
+; CHECK-NEXT:    fmla v3.2d, v5.2d, v2.2d
+; CHECK-NEXT:    fmla v1.2d, v0.2d, v2.2d
+; CHECK-NEXT:    zip1 v0.2d, v1.2d, v3.2d
+; CHECK-NEXT:    zip2 v1.2d, v1.2d, v3.2d
+; CHECK-NEXT:    ret
+entry:
+  %a.real   = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 0, i32 2>
+  %a.imag = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> <i32 1, i32 3>
+  %b.real = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 0, i32 2>
+  %b.imag = shufflevector <4 x double> %b, <4 x double> poison, <2 x i32> <i32 1, i32 3>
+  %0 = fmul fast <2 x double> %b.imag, %a.real
+  %1 = fmul fast <2 x double> %b.real, %a.imag
+  %2 = fadd fast <2 x double> %1, %0
+  %3 = fmul fast <2 x double> %b.real, %a.real
+  %4 = fmul fast <2 x double> %a.imag, %b.imag
+  %5 = fsub fast <2 x double> %3, %4
+  %interleaved.vec = shufflevector <2 x double> %5, <2 x double> %2, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+  ret <4 x double> %interleaved.vec
+}
+
+define <8 x double> @complex_mul_v8f64(<8 x double> %a, <8 x double> %b) {
+; CHECK-LABEL: complex_mul_v8f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    zip2 v16.2d, v4.2d, v5.2d
+; CHECK-NEXT:    zip1 v17.2d, v2.2d, v3.2d
+; CHECK-NEXT:    zip1 v18.2d, v0.2d, v1.2d
+; CHECK-NEXT:    zip2 v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    zip2 v1.2d, v2.2d, v3.2d
+; CHECK-NEXT:    zip2 v2.2d, v6.2d, v7.2d
+; CHECK-NEXT:    zip1 v3.2d, v4.2d, v5.2d
+; CHECK-NEXT:    fmul v4.2d, v0.2d, v16.2d
+; CHECK-NEXT:    zip1 v5.2d, v6.2d, v7.2d
+; CHECK-NEXT:    fmul v6.2d, v1.2d, v2.2d
+; CHECK-NEXT:    fmul v7.2d, v16.2d, v18.2d
+; CHECK-NEXT:    fneg v4.2d, v4.2d
+; CHECK-NEXT:    fmul v16.2d, v2.2d, v17.2d
+; CHECK-NEXT:    fneg v6.2d, v6.2d
+; CHECK-NEXT:    fmla v7.2d, v0.2d, v3.2d
+; CHECK-NEXT:    fmla v4.2d, v18.2d, v3.2d
+; CHECK-NEXT:    fmla v16.2d, v1.2d, v5.2d
+; CHECK-NEXT:    fmla v6.2d, v17.2d, v5.2d
+; CHECK-NEXT:    zip1 v0.2d, v4.2d, v7.2d
+; CHECK-NEXT:    zip2 v1.2d, v4.2d, v7.2d
+; CHECK-NEXT:    zip1 v2.2d, v6.2d, v16.2d
+; CHECK-NEXT:    zip2 v3.2d, v6.2d, v16.2d
+; CHECK-NEXT:    ret
+entry:
+  %a.real   = shufflevector <8 x double> %a, <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %a.imag = shufflevector <8 x double> %a, <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %b.real = shufflevector <8 x double> %b, <8 x double> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+  %b.imag = shufflevector <8 x double> %b, <8 x double> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+  %0 = fmul fast <4 x double> %b.imag, %a.real
+  %1 = fmul fast <4 x double> %b.real, %a.imag
+  %2 = fadd fast <4 x double> %1, %0
+  %3 = fmul fast <4 x double> %b.real, %a.real
+  %4 = fmul fast <4 x double> %a.imag, %b.imag
+  %5 = fsub fast <4 x double> %3, %4
+  %interleaved.vec = shufflevector <4 x double> %5, <4 x double> %2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+  ret <8 x double> %interleaved.vec
+}
diff --git a/llvm/test/CodeGen/AArch64/O3-pipeline.ll b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
--- a/llvm/test/CodeGen/AArch64/O3-pipeline.ll
+++ b/llvm/test/CodeGen/AArch64/O3-pipeline.ll
@@ -74,6 +74,7 @@
 ; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
 ; CHECK-NEXT:       Function Alias Analysis Results
 ; CHECK-NEXT:       AArch64 Stack Tagging
+; CHECK-NEXT:       Complex Arithmetic Pass
 ; CHECK-NEXT:       Function Alias Analysis Results
 ; CHECK-NEXT:       Memory SSA
 ; CHECK-NEXT:       Interleaved Load Combine Pass