Index: include/clang/Basic/arm_neon.td =================================================================== --- include/clang/Basic/arm_neon.td +++ include/clang/Basic/arm_neon.td @@ -373,6 +373,10 @@ (splat $p2, $p3))>; def OP_QDMULH_LN : Op<(call "vqdmulh", $p0, (splat $p1, $p2))>; def OP_QRDMULH_LN : Op<(call "vqrdmulh", $p0, (splat $p1, $p2))>; +def OP_QRDMLAH : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, $p2))>; +def OP_QRDMLSH : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, $p2))>; +def OP_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>; +def OP_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, (splat $p2, $p3)))>; def OP_FMS_LN : Op<(call "vfma_lane", $p0, $p1, (op "-", $p2), $p3)>; def OP_FMS_LNQ : Op<(call "vfma_laneq", $p0, $p1, (op "-", $p2), $p3)>; def OP_TRN1 : Op<(shuffle $p0, $p1, (interleave (decimate mask0, 2), @@ -473,6 +477,11 @@ def OP_SCALAR_QDMULH_LN : ScalarMulOp<"vqdmulh">; def OP_SCALAR_QRDMULH_LN : ScalarMulOp<"vqrdmulh">; +def OP_SCALAR_QRDMLAH_LN : Op<(call "vqadd", $p0, (call "vqrdmulh", $p1, + (call "vget_lane", $p2, $p3)))>; +def OP_SCALAR_QRDMLSH_LN : Op<(call "vqsub", $p0, (call "vqrdmulh", $p1, + (call "vget_lane", $p2, $p3)))>; + def OP_SCALAR_HALF_GET_LN : Op<(bitcast "float16_t", (call "vget_lane", (bitcast "int16x4_t", $p0), $p1))>; @@ -514,6 +523,12 @@ def VMLSL : SOpInst<"vmlsl", "wwdd", "csiUcUsUi", OP_MLSL>; def VQDMULH : SInst<"vqdmulh", "ddd", "siQsQi">; def VQRDMULH : SInst<"vqrdmulh", "ddd", "siQsQi">; + +let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in { +def VQRDMLAH : SOpInst<"vqrdmlah", "dddd", "siQsQi", OP_QRDMLAH>; +def VQRDMLSH : SOpInst<"vqrdmlsh", "dddd", "siQsQi", OP_QRDMLSH>; +} + def VQDMLAL : SInst<"vqdmlal", "wwdd", "si">; def VQDMLSL : SInst<"vqdmlsl", "wwdd", "si">; def VMULL : SInst<"vmull", "wdd", "csiUcUsUiPc">; @@ -741,6 +756,12 @@ def VQDMULH_LANE : SOpInst<"vqdmulh_lane", "ddgi", "siQsQi", OP_QDMULH_LN>; def VQRDMULH_N : SInst<"vqrdmulh_n", "dda", "siQsQi">; def VQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ddgi", "siQsQi", OP_QRDMULH_LN>; + +let ArchGuard = "defined(__ARM_FEATURE_QRDMX)" in { +def VQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "dddgi", "siQsQi", OP_QRDMLAH_LN>; +def VQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "dddgi", "siQsQi", OP_QRDMLSH_LN>; +} + def VMLA_N : IOpInst<"vmla_n", "ddda", "siUsUifQsQiQUsQUiQf", OP_MLA_N>; def VMLAL_N : SOpInst<"vmlal_n", "wwda", "siUsUi", OP_MLAL_N>; def VQDMLAL_N : SInst<"vqdmlal_n", "wwda", "si">; @@ -1160,6 +1181,11 @@ def VQDMULH_LANEQ : SOpInst<"vqdmulh_laneq", "ddji", "siQsQi", OP_QDMULH_LN>; def VQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ddji", "siQsQi", OP_QRDMULH_LN>; +let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { +def VQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "dddji", "siQsQi", OP_QRDMLAH_LN>; +def VQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "dddji", "siQsQi", OP_QRDMLSH_LN>; +} + // Note: d type implemented by SCALAR_VMULX_LANE def VMULX_LANE : IOpInst<"vmulx_lane", "ddgi", "fQfQd", OP_MULX_LN>; // Note: d type is implemented by SCALAR_VMULX_LANEQ @@ -1405,6 +1431,16 @@ // Scalar Integer Saturating Rounding Doubling Multiply Half High def SCALAR_SQRDMULH : SInst<"vqrdmulh", "sss", "SsSi">; +let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { +//////////////////////////////////////////////////////////////////////////////// +// Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half +def SCALAR_SQRDMLAH : SOpInst<"vqrdmlah", "ssss", "SsSi", OP_QRDMLAH>; + +//////////////////////////////////////////////////////////////////////////////// +// Signed Saturating Rounding Doubling Multiply Subtract Returning High Half +def SCALAR_SQRDMLSH : SOpInst<"vqrdmlsh", "ssss", "SsSi", OP_QRDMLSH>; +} + //////////////////////////////////////////////////////////////////////////////// // Scalar Floating-point Multiply Extended def SCALAR_FMULX : IInst<"vmulx", "sss", "SfSd">; @@ -1606,6 +1642,16 @@ def SCALAR_SQRDMULH_LANE : SOpInst<"vqrdmulh_lane", "ssdi", "SsSi", OP_SCALAR_QRDMULH_LN>; def SCALAR_SQRDMULH_LANEQ : SOpInst<"vqrdmulh_laneq", "ssji", "SsSi", OP_SCALAR_QRDMULH_LN>; +let ArchGuard = "defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__)" in { +// Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half +def SCALAR_SQRDMLAH_LANE : SOpInst<"vqrdmlah_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLAH_LN>; +def SCALAR_SQRDMLAH_LANEQ : SOpInst<"vqrdmlah_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLAH_LN>; + +// Signed Saturating Rounding Doubling Multiply Subtract Returning High Half +def SCALAR_SQRDMLSH_LANE : SOpInst<"vqrdmlsh_lane", "sssdi", "SsSi", OP_SCALAR_QRDMLSH_LN>; +def SCALAR_SQRDMLSH_LANEQ : SOpInst<"vqrdmlsh_laneq", "sssji", "SsSi", OP_SCALAR_QRDMLSH_LN>; +} + def SCALAR_VDUP_LANE : IInst<"vdup_lane", "sdi", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; def SCALAR_VDUP_LANEQ : IInst<"vdup_laneq", "sji", "ScSsSiSlSfSdSUcSUsSUiSUlSPcSPs">; } Index: lib/Basic/Targets.cpp =================================================================== --- lib/Basic/Targets.cpp +++ lib/Basic/Targets.cpp @@ -4869,6 +4869,9 @@ if (Opts.UnsafeFPMath) Builder.defineMacro("__ARM_FP_FAST", "1"); + + if (ArchKind == llvm::ARM::AK_ARMV8_1A) + Builder.defineMacro("__ARM_FEATURE_QRDMX", "1"); } ArrayRef getTargetBuiltins() const override { @@ -5250,6 +5253,7 @@ unsigned CRC; unsigned Crypto; unsigned Unaligned; + unsigned V8_1A; static const Builtin::Info BuiltinInfo[]; @@ -5372,6 +5376,9 @@ if (Unaligned) Builder.defineMacro("__ARM_FEATURE_UNALIGNED", "1"); + if (V8_1A) + Builder.defineMacro("__ARM_FEATURE_QRDMX", "1"); + // All of the __sync_(bool|val)_compare_and_swap_(1|2|4|8) builtins work. Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_1"); Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2"); @@ -5397,6 +5404,7 @@ CRC = 0; Crypto = 0; Unaligned = 1; + V8_1A = 0; for (const auto &Feature : Features) { if (Feature == "+neon") @@ -5407,6 +5415,8 @@ Crypto = 1; if (Feature == "+strict-align") Unaligned = 0; + if (Feature == "+v8.1a") + V8_1A = 1; } setDataLayoutString(); Index: test/CodeGen/aarch64-v8.1a-neon-intrinsics.c =================================================================== --- /dev/null +++ test/CodeGen/aarch64-v8.1a-neon-intrinsics.c @@ -0,0 +1,128 @@ +// REQUIRES: aarch64-registered-target + +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ +// RUN: -target-feature +v8.1a -O3 -S -o - %s \ +// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 + + #include + +// CHECK-AARCH64-LABEL: test_vqrdmlah_laneq_s16 +int16x4_t test_vqrdmlah_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { +// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] + return vqrdmlah_laneq_s16(a, b, v, 7); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlah_laneq_s32 +int32x2_t test_vqrdmlah_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { +// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] + return vqrdmlah_laneq_s32(a, b, v, 3); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlahq_laneq_s16 +int16x8_t test_vqrdmlahq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { +// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] + return vqrdmlahq_laneq_s16(a, b, v, 7); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlahq_laneq_s32 +int32x4_t test_vqrdmlahq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { +// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] + return vqrdmlahq_laneq_s32(a, b, v, 3); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlahh_s16 +int16_t test_vqrdmlahh_s16(int16_t a, int16_t b, int16_t c) { +// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}} + return vqrdmlahh_s16(a, b, c); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlahs_s32 +int32_t test_vqrdmlahs_s32(int32_t a, int32_t b, int32_t c) { +// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + return vqrdmlahs_s32(a, b, c); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlahh_lane_s16 +int16_t test_vqrdmlahh_lane_s16(int16_t a, int16_t b, int16x4_t c) { +// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3] + return vqrdmlahh_lane_s16(a, b, c, 3); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlahs_lane_s32 +int32_t test_vqrdmlahs_lane_s32(int32_t a, int32_t b, int32x2_t c) { +// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] + return vqrdmlahs_lane_s32(a, b, c, 1); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlahh_laneq_s16 +int16_t test_vqrdmlahh_laneq_s16(int16_t a, int16_t b, int16x8_t c) { +// CHECK-AARCH64: sqrdmlah {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7] + return vqrdmlahh_laneq_s16(a, b, c, 7); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlahs_laneq_s32 +int32_t test_vqrdmlahs_laneq_s32(int32_t a, int32_t b, int32x4_t c) { +// CHECK-AARCH64: sqrdmlah {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] + return vqrdmlahs_laneq_s32(a, b, c, 3); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlsh_laneq_s16 +int16x4_t test_vqrdmlsh_laneq_s16(int16x4_t a, int16x4_t b, int16x8_t v) { +// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[7] + return vqrdmlsh_laneq_s16(a, b, v, 7); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlsh_laneq_s32 +int32x2_t test_vqrdmlsh_laneq_s32(int32x2_t a, int32x2_t b, int32x4_t v) { +// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[3] + return vqrdmlsh_laneq_s32(a, b, v, 3); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlshq_laneq_s16 +int16x8_t test_vqrdmlshq_laneq_s16(int16x8_t a, int16x8_t b, int16x8_t v) { +// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[7] + return vqrdmlshq_laneq_s16(a, b, v, 7); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlshq_laneq_s32 +int32x4_t test_vqrdmlshq_laneq_s32(int32x4_t a, int32x4_t b, int32x4_t v) { +// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[3] + return vqrdmlshq_laneq_s32(a, b, v, 3); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlshh_s16 +int16_t test_vqrdmlshh_s16(int16_t a, int16_t b, int16_t c) { +// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}} + return vqrdmlshh_s16(a, b, c); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlshs_s32 +int32_t test_vqrdmlshs_s32(int32_t a, int32_t b, int32_t c) { +// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}} + return vqrdmlshs_s32(a, b, c); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlshh_lane_s16 +int16_t test_vqrdmlshh_lane_s16(int16_t a, int16_t b, int16x4_t c) { +// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[3] + return vqrdmlshh_lane_s16(a, b, c, 3); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlshs_lane_s32 +int32_t test_vqrdmlshs_lane_s32(int32_t a, int32_t b, int32x2_t c) { +// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1] + return vqrdmlshs_lane_s32(a, b, c, 1); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlshh_laneq_s16 +int16_t test_vqrdmlshh_laneq_s16(int16_t a, int16_t b, int16x8_t c) { +// CHECK-AARCH64: sqrdmlsh {{h[0-9]+|v[0-9]+.4h}}, {{h[0-9]+|v[0-9]+.4h}}, {{v[0-9]+}}.h[7] + return vqrdmlshh_laneq_s16(a, b, c, 7); +} + +// CHECK-AARCH64-LABEL: test_vqrdmlshs_laneq_s32 +int32_t test_vqrdmlshs_laneq_s32(int32_t a, int32_t b, int32x4_t c) { +// CHECK-AARCH64: sqrdmlsh {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3] + return vqrdmlshs_laneq_s32(a, b, c, 3); +} + Index: test/CodeGen/arm-v8.1a-neon-intrinsics.c =================================================================== --- /dev/null +++ test/CodeGen/arm-v8.1a-neon-intrinsics.c @@ -0,0 +1,121 @@ +// RUN: %clang_cc1 -triple armv8.1a-linux-gnu -target-feature +neon \ +// RUN: -O3 -S -o - %s \ +// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ARM +// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon \ +// RUN: -target-feature +v8.1a -O3 -S -o - %s \ +// RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-AARCH64 + +#include + +// CHECK-LABEL: test_vqrdmlah_s16 +int16x4_t test_vqrdmlah_s16(int16x4_t a, int16x4_t b, int16x4_t c) { +// CHECK-ARM: vqrdmlah.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + return vqrdmlah_s16(a, b, c); +} + +// CHECK-LABEL: test_vqrdmlah_s32 +int32x2_t test_vqrdmlah_s32(int32x2_t a, int32x2_t b, int32x2_t c) { +// CHECK-ARM: vqrdmlah.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + return vqrdmlah_s32(a, b, c); +} + +// CHECK-LABEL: test_vqrdmlahq_s16 +int16x8_t test_vqrdmlahq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { +// CHECK-ARM: vqrdmlah.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + return vqrdmlahq_s16(a, b, c); +} + +// CHECK-LABEL: test_vqrdmlahq_s32 +int32x4_t test_vqrdmlahq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { +// CHECK-ARM: vqrdmlah.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + return vqrdmlahq_s32(a, b, c); +} + +// CHECK-LABEL: test_vqrdmlah_lane_s16 +int16x4_t test_vqrdmlah_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { +// CHECK-ARM: vqrdmlah.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[3] +// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] + return vqrdmlah_lane_s16(a, b, c, 3); +} + +// CHECK-LABEL: test_vqrdmlah_lane_s32 +int32x2_t test_vqrdmlah_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { +// CHECK-ARM: vqrdmlah.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1] +// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vqrdmlah_lane_s32(a, b, c, 1); +} + +// CHECK-LABEL: test_vqrdmlahq_lane_s16 +int16x8_t test_vqrdmlahq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { +// CHECK-ARM: vqrdmlah.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[3] +// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] + return vqrdmlahq_lane_s16(a, b, c, 3); +} + +// CHECK-LABEL: test_vqrdmlahq_lane_s32 +int32x4_t test_vqrdmlahq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { +// CHECK-ARM: vqrdmlah.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[1] +// CHECK-AARCH64: sqrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vqrdmlahq_lane_s32(a, b, c, 1); +} + +// CHECK-LABEL: test_vqrdmlsh_s16 +int16x4_t test_vqrdmlsh_s16(int16x4_t a, int16x4_t b, int16x4_t c) { +// CHECK-ARM: vqrdmlsh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h + return vqrdmlsh_s16(a, b, c); +} + +// CHECK-LABEL: test_vqrdmlsh_s32 +int32x2_t test_vqrdmlsh_s32(int32x2_t a, int32x2_t b, int32x2_t c) { +// CHECK-ARM: vqrdmlsh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}} +// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s + return vqrdmlsh_s32(a, b, c); +} + +// CHECK-LABEL: test_vqrdmlshq_s16 +int16x8_t test_vqrdmlshq_s16(int16x8_t a, int16x8_t b, int16x8_t c) { +// CHECK-ARM: vqrdmlsh.s16 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h + return vqrdmlshq_s16(a, b, c); +} + +// CHECK-LABEL: test_vqrdmlshq_s32 +int32x4_t test_vqrdmlshq_s32(int32x4_t a, int32x4_t b, int32x4_t c) { +// CHECK-ARM: vqrdmlsh.s32 q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} +// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s + return vqrdmlshq_s32(a, b, c); +} + +// CHECK-LABEL: test_vqrdmlsh_lane_s16 +int16x4_t test_vqrdmlsh_lane_s16(int16x4_t a, int16x4_t b, int16x4_t c) { +// CHECK-ARM: vqrdmlsh.s16 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[3] +// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] + return vqrdmlsh_lane_s16(a, b, c, 3); +} + +// CHECK-LABEL: test_vqrdmlsh_lane_s32 +int32x2_t test_vqrdmlsh_lane_s32(int32x2_t a, int32x2_t b, int32x2_t c) { +// CHECK-ARM: vqrdmlsh.s32 d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}[1] +// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] + return vqrdmlsh_lane_s32(a, b, c, 1); +} + +// CHECK-LABEL: test_vqrdmlshq_lane_s16 +int16x8_t test_vqrdmlshq_lane_s16(int16x8_t a, int16x8_t b, int16x4_t c) { +// CHECK-ARM: vqrdmlsh.s16 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[3] +// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] + return vqrdmlshq_lane_s16(a, b, c, 3); +} + +// CHECK-LABEL: test_vqrdmlshq_lane_s32 +int32x4_t test_vqrdmlshq_lane_s32(int32x4_t a, int32x4_t b, int32x2_t c) { +// CHECK-ARM: vqrdmlsh.s32 q{{[0-9]+}}, q{{[0-9]+}}, d{{[0-9]+}}[1] +// CHECK-AARCH64: sqrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] + return vqrdmlshq_lane_s32(a, b, c, 1); +} + Index: test/Preprocessor/aarch64-target-features.c =================================================================== --- test/Preprocessor/aarch64-target-features.c +++ test/Preprocessor/aarch64-target-features.c @@ -71,6 +71,9 @@ // CHECK-NEON: __ARM_NEON 1 // CHECK-NEON: __ARM_NEON_FP 0xE +// RUN: %clang -target aarch64-none-eabi -march=armv8.1-a -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-V81A %s +// CHECK-V81A: __ARM_FEATURE_QRDMX 1 + // RUN: %clang -target aarch64 -march=arm64 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-ARCH-NOT-ACCEPT %s // RUN: %clang -target aarch64 -march=aarch64 -### -c %s 2>&1 | FileCheck -check-prefix=CHECK-ARCH-NOT-ACCEPT %s // CHECK-ARCH-NOT-ACCEPT: error: the clang compiler does not support Index: test/Preprocessor/arm-target-features.c =================================================================== --- test/Preprocessor/arm-target-features.c +++ test/Preprocessor/arm-target-features.c @@ -407,4 +407,5 @@ // CHECK-V81A: __ARM_ARCH 8 // CHECK-V81A: __ARM_ARCH_8_1A__ 1 // CHECK-V81A: #define __ARM_ARCH_PROFILE 'A' +// CHECK-V81A: __ARM_FEATURE_QRDMX 1 // CHECK-V81A: #define __ARM_FP 0xE