diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -307,10 +307,12 @@ (IRIntBase<"maxnum", [Vector]> $a, $b)>; def vminnmaq: Intrinsic - $a, (IRIntBase<"fabs", [Vector]> $b))>; + (IRIntBase<"fabs", [Vector]> $a), + (IRIntBase<"fabs", [Vector]> $b))>; def vmaxnmaq: Intrinsic - $a, (IRIntBase<"fabs", [Vector]> $b))>; + (IRIntBase<"fabs", [Vector]> $a), + (IRIntBase<"fabs", [Vector]> $b))>; } def vpselq: Intrinsic @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[TMP0]]) -// CHECK-NEXT: ret <8 x half> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vmaxnmaq_f16(float16x8_t a, float16x8_t b) { @@ -21,9 +22,10 @@ // CHECK-LABEL: @test_vmaxnmaq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[TMP0]]) -// CHECK-NEXT: ret <4 x float> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vmaxnmaq_f32(float32x4_t a, float32x4_t b) { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vminnmaq.c @@ -6,9 +6,10 @@ // CHECK-LABEL: @test_vminnmaq_f16( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> [[A:%.*]], <8 x half> [[TMP0]]) -// CHECK-NEXT: ret <8 x half> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> [[TMP0]], <8 x half> [[TMP1]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] // float16x8_t test_vminnmaq_f16(float16x8_t a, float16x8_t b) { @@ -21,9 +22,10 @@ // CHECK-LABEL: @test_vminnmaq_f32( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> [[A:%.*]], <4 x float> [[TMP0]]) -// CHECK-NEXT: ret <4 x float> [[TMP1]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[A:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[B:%.*]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] // float32x4_t test_vminnmaq_f32(float32x4_t a, float32x4_t b) { diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -3655,7 +3655,8 @@ let Predicates = [HasMVEInt] in { // Unpredicated v(max|min)nma - def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qd), (fabs (VTI.Vec MQPR:$Qm)))), + def : Pat<(VTI.Vec (unpred_op (fabs (VTI.Vec MQPR:$Qd)), + (fabs (VTI.Vec MQPR:$Qm)))), (VTI.Vec (Inst (VTI.Vec MQPR:$Qd), (VTI.Vec MQPR:$Qm)))>; // Predicated v(max|min)nma diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmaq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmaq.ll --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmaq.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmaxnmaq.ll @@ -7,9 +7,10 @@ ; CHECK-NEXT: vmaxnma.f16 q0, q1 ; CHECK-NEXT: bx lr entry: - %0 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %b) - %1 = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> %a, <8 x half> %0) - ret <8 x half> %1 + %0 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) + %1 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %b) + %2 = tail call <8 x half> @llvm.maxnum.v8f16(<8 x half> %0, <8 x half> %1) + ret <8 x half> %2 } declare <8 x half> @llvm.fabs.v8f16(<8 x half>) #1 @@ -22,9 +23,10 @@ ; CHECK-NEXT: vmaxnma.f32 q0, q1 ; CHECK-NEXT: bx lr entry: - %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %b) - %1 = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> %a, <4 x float> %0) - ret <4 x float> %1 + %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) + %1 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %b) + %2 = tail call <4 x float> @llvm.maxnum.v4f32(<4 x float> %0, <4 x float> %1) + ret <4 x float> %2 } declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #1 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmaq.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmaq.ll --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmaq.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vminnmaq.ll @@ -7,9 +7,10 @@ ; CHECK-NEXT: vminnma.f16 q0, q1 ; CHECK-NEXT: bx lr entry: - %0 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %b) - %1 = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> %a, <8 x half> %0) - ret <8 x half> %1 + %0 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %a) + %1 = tail call <8 x half> @llvm.fabs.v8f16(<8 x half> %b) + %2 = tail call <8 x half> @llvm.minnum.v8f16(<8 x half> %0, <8 x half> %1) + ret <8 x half> %2 } declare <8 x half> @llvm.fabs.v8f16(<8 x half>) #1 @@ -22,9 +23,10 @@ ; CHECK-NEXT: vminnma.f32 q0, q1 ; CHECK-NEXT: bx lr entry: - %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %b) - %1 = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> %a, <4 x float> %0) - ret <4 x float> %1 + %0 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %a) + %1 = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> %b) + %2 = tail call <4 x float> @llvm.minnum.v4f32(<4 x float> %0, <4 x float> %1) + ret <4 x float> %2 } declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #1