diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3933,6 +3933,16 @@ SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::complex_multiply: { + auto ResVT = Op.getValueType(); + SDValue Part1 = + DAG.getNode(AArch64ISD::FCMLA, dl, Op.getValueType(), + DAG.getConstantFP(0.0, dl, ResVT), Op.getOperand(1), + Op.getOperand(2), DAG.getTargetConstant(0, dl, MVT::i32)); + return DAG.getNode(AArch64ISD::FCMLA, dl, Op.getValueType(), Part1, + Op.getOperand(1), Op.getOperand(2), + DAG.getTargetConstant(90, dl, MVT::i32)); + } case Intrinsic::thread_pointer: { EVT PtrVT = getPointerTy(DAG.getDataLayout()); return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT); diff --git a/llvm/test/CodeGen/AArch64/complex-intrinsics.ll b/llvm/test/CodeGen/AArch64/complex-intrinsics.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/complex-intrinsics.ll @@ -0,0 +1,38 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -mattr="+complxnum" | FileCheck %s + +declare <2 x float> @llvm.complex.multiply.v2f32(<2 x float>, <2 x float>) + +define <2 x float> @test_fcmla_v2f32(<2 x float> %a, <2 x float> %b) { +; CHECK-LABEL: test_fcmla_v2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: movi d2, #0000000000000000 +; CHECK-NEXT: fcmla.2s v2, v0, v1, #0 +; CHECK-NEXT: fcmla.2s v2, v0, v1, #90 +; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ret + %res = call <2 x float> @llvm.complex.multiply.v2f32(<2 x float> %a, <2 x float> %b) + ret <2 x float> %res +} + +declare <4 x float> @llvm.complex.multiply.v4f32(<4 x float>, <4 x float>) + +define <4 x float> @test_fcmla_v4f32(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: test_fcmla_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: fcmla.4s v2, v0, v1, #0 +; CHECK-NEXT: fcmla.4s v2, v0, v1, #90 +; CHECK-NEXT: mov.16b v0, v2 +; CHECK-NEXT: ret + %res = call <4 x float> @llvm.complex.multiply.v4f32(<4 x float> %a, <4 x float> %b) + ret <4 x float> %res +} + +; FIXME: Crashes during type legalization. +; declare <8 x float> @llvm.complex.multiply.v8f32(<8 x float>, <8 x float>) + +; define <8 x float> @test_fcmla_v8f32(<8 x float> %a, <8 x float> %b) { +; %res = call <8 x float> @llvm.complex.multiply.v8f32(<8 x float> %a, <8 x float> %b) +; ret <8 x float> %res +;}