diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -708,6 +708,11 @@ def SCALAR_HALF_GET_LANEQ : IOpInst<"vget_lane", "1.I", "Qh", OP_SCALAR_HALF_GET_LNQ>; def SCALAR_HALF_SET_LANEQ : IOpInst<"vset_lane", ".1.I", "Qh", OP_SCALAR_HALF_SET_LNQ>; +//////////////////////////////////////////////////////////////////////////////// +// Non poly128_t vaddp for Arm and AArch64 +// TODO: poly128_t not implemented on arm32 +def VADDP : WInst<"vadd", "...", "PcPsPlQPcQPsQPl">; + //////////////////////////////////////////////////////////////////////////////// // AArch64 Intrinsics @@ -1171,7 +1176,9 @@ def SM4EKEY : SInst<"vsm4ekey", "...", "QUi">; } -def VADDP : WInst<"vadd", "...", "PcPsPlQPcQPsQPlQPk">; +//////////////////////////////////////////////////////////////////////////////// +// poly128_t vadd for AArch64 only see VADDP for the rest +def VADDP_Q : WInst<"vadd", "...", "QPk">; //////////////////////////////////////////////////////////////////////////////// // Float -> Int conversions with explicit rounding mode diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5460,7 +5460,6 @@ NEONMAP1(vabsq_v, arm_neon_vabs, 0), NEONMAP0(vadd_v), NEONMAP0(vaddhn_v), - NEONMAP0(vaddq_p128), NEONMAP0(vaddq_v), NEONMAP1(vaesdq_v, arm_neon_aesd, 0), NEONMAP1(vaeseq_v, arm_neon_aese, 0), diff --git a/clang/test/CodeGen/arm-poly-add.c b/clang/test/CodeGen/arm-poly-add.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/arm-poly-add.c @@ -0,0 +1,86 @@ +// REQUIRES: arm-registered-target +// RUN: %clang_cc1 -triple armv8.2a-arm-none-eabi \ +// RUN: -target-feature +neon \ +// RUN: -mfloat-abi hard \ +// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -mem2reg \ +// RUN: | FileCheck %s + +#include + +// CHECK-LABEL: @test_vadd_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor <8 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <8 x i8> [[TMP0]] +// +poly8x8_t test_vadd_p8(poly8x8_t a, poly8x8_t b) { + return vadd_p8 (a, b); +} + +// CHECK-LABEL: @test_vadd_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A:%.*]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B:%.*]] to <8 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT: ret <4 x i16> [[TMP3]] +// +poly16x4_t test_vadd_p16(poly16x4_t a, poly16x4_t b) { + return vadd_p16 (a, b); +} + +// CHECK-LABEL: @test_vadd_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <1 x i64> [[A:%.*]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <1 x i64> [[B:%.*]] to <8 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> +// CHECK-NEXT: ret <1 x i64> [[TMP3]] +// +poly64x1_t test_vadd_p64(poly64x1_t a, poly64x1_t b) { + return vadd_p64(a, b); +} + +// CHECK-LABEL: @test_vaddq_p8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = xor <16 x i8> [[A:%.*]], [[B:%.*]] +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +poly8x16_t test_vaddq_p8(poly8x16_t a, poly8x16_t b){ + return vaddq_p8(a, b); +} + +// CHECK-LABEL: @test_vaddq_p16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT: ret <8 x i16> [[TMP3]] +// +poly16x8_t test_vaddq_p16(poly16x8_t a, poly16x8_t b){ + return vaddq_p16(a, b); +} + +// CHECK-LABEL: @test_vaddq_p64( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i64> [[A:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[B:%.*]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> +// CHECK-NEXT: ret <2 x i64> [[TMP3]] +// +poly64x2_t test_vaddq_p64(poly64x2_t a, poly64x2_t b){ + return vaddq_p64(a, b); +} + +// TODO: poly128_t not implemented on aarch32 +// CHCK-LABEL: @test_vaddq_p128( +// CHCK-NEXT: entry: +// CHCK-NEXT: [[TMP0:%.*]] = bitcast i128 [[A:%.*]] to <16 x i8> +// CHCK-NEXT: [[TMP1:%.*]] = bitcast i128 [[B:%.*]] to <16 x i8> +// CHCK-NEXT: [[TMP2:%.*]] = xor <16 x i8> [[TMP0]], [[TMP1]] +// CHCK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to i128 +// CHCK-NEXT: ret i128 [[TMP3]] +// +//poly128_t test_vaddq_p128 (poly128_t a, poly128_t b){ +// return vaddq_p128(a, b);