Index: clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c =================================================================== --- clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -1486,18 +1486,18 @@ // CHECK-LABEL: define {{[^@]+}}@test_vfmaq_n_f16 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[C]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[C]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[C]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[C]], i32 3 -// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[C]], i32 4 -// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[C]], i32 5 -// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[C]], i32 6 -// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[C]], i32 7 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x half> undef, half [[C]], i32 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x half> [[VECINIT_I]], half [[C]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x half> [[VECINIT1_I]], half [[C]], i32 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x half> [[VECINIT2_I]], half [[C]], i32 3 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x half> [[VECINIT3_I]], half [[C]], i32 4 +// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x half> [[VECINIT4_I]], half [[C]], i32 5 +// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x half> [[VECINIT5_I]], half [[C]], i32 6 +// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x half> [[VECINIT6_I]], half [[C]], i32 7 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7]] to <16 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[VECINIT7]], <8 x half> [[A]]) +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7_I]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[B]], <8 x half> [[VECINIT7_I]], <8 x half> [[A]]) // CHECK-NEXT: ret <8 x half> [[TMP3]] // float16x8_t test_vfmaq_n_f16(float16x8_t a, float16x8_t b, float16_t c) { @@ -1601,15 +1601,15 @@ // CHECK-LABEL: define {{[^@]+}}@test_vfms_n_f16 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[FNEG:%.*]] = fneg <4 x half> [[B]] -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[C]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[C]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[C]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[C]], i32 3 +// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <4 x half> [[B]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x half> undef, half [[C]], i32 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x half> [[VECINIT_I]], half [[C]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x half> [[VECINIT1_I]], half [[C]], i32 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x half> [[VECINIT2_I]], half [[C]], i32 3 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[VECINIT3]] to <8 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[FNEG]], <4 x half> [[VECINIT3]], <4 x half> [[A]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[FNEG_I]] to <8 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[VECINIT3_I]] to <8 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.fma.v4f16(<4 x half> [[FNEG_I]], <4 x half> [[VECINIT3_I]], <4 x half> [[A]]) // CHECK-NEXT: ret <4 x half> [[TMP3]] // float16x4_t test_vfms_n_f16(float16x4_t a, float16x4_t b, float16_t c) { @@ -1619,19 +1619,19 @@ // CHECK-LABEL: define {{[^@]+}}@test_vfmsq_n_f16 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], half noundef [[C:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[FNEG:%.*]] = fneg <8 x half> [[B]] -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[C]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[C]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[C]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[C]], i32 3 -// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[C]], i32 4 -// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[C]], i32 5 -// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[C]], i32 6 -// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[C]], i32 7 +// CHECK-NEXT: [[FNEG_I:%.*]] = fneg <8 x half> [[B]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x half> undef, half [[C]], i32 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x half> [[VECINIT_I]], half [[C]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x half> [[VECINIT1_I]], half [[C]], i32 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x half> [[VECINIT2_I]], half [[C]], i32 3 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x half> [[VECINIT3_I]], half [[C]], i32 4 +// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x half> [[VECINIT4_I]], half [[C]], i32 5 +// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x half> [[VECINIT5_I]], half [[C]], i32 6 +// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x half> [[VECINIT6_I]], half [[C]], i32 7 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7]] to <16 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[FNEG]], <8 x half> [[VECINIT7]], <8 x half> [[A]]) +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[FNEG_I]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[VECINIT7_I]] to <16 x i8> +// CHECK-NEXT: [[TMP3:%.*]] = call <8 x half> @llvm.fma.v8f16(<8 x half> [[FNEG_I]], <8 x half> [[VECINIT7_I]], <8 x half> [[A]]) // CHECK-NEXT: ret <8 x half> [[TMP3]] // float16x8_t test_vfmsq_n_f16(float16x8_t a, float16x8_t b, float16_t c) { @@ -1721,12 +1721,12 @@ // CHECK-LABEL: define {{[^@]+}}@test_vmul_n_f16 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[B]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[B]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[B]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[B]], i32 3 -// CHECK-NEXT: [[MUL:%.*]] = fmul <4 x half> [[A]], [[VECINIT3]] -// CHECK-NEXT: ret <4 x half> [[MUL]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x half> undef, half [[B]], i32 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x half> [[VECINIT_I]], half [[B]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x half> [[VECINIT1_I]], half [[B]], i32 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x half> [[VECINIT2_I]], half [[B]], i32 3 +// CHECK-NEXT: [[MUL_I:%.*]] = fmul <4 x half> [[A]], [[VECINIT3_I]] +// CHECK-NEXT: ret <4 x half> [[MUL_I]] // float16x4_t test_vmul_n_f16(float16x4_t a, float16_t b) { return vmul_n_f16(a, b); @@ -1735,16 +1735,16 @@ // CHECK-LABEL: define {{[^@]+}}@test_vmulq_n_f16 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[B]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[B]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[B]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[B]], i32 3 -// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[B]], i32 4 -// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[B]], i32 5 -// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[B]], i32 6 -// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[B]], i32 7 -// CHECK-NEXT: [[MUL:%.*]] = fmul <8 x half> [[A]], [[VECINIT7]] -// CHECK-NEXT: ret <8 x half> [[MUL]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x half> undef, half [[B]], i32 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x half> [[VECINIT_I]], half [[B]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x half> [[VECINIT1_I]], half [[B]], i32 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x half> [[VECINIT2_I]], half [[B]], i32 3 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x half> [[VECINIT3_I]], half [[B]], i32 4 +// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x half> [[VECINIT4_I]], half [[B]], i32 5 +// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x half> [[VECINIT5_I]], half [[B]], i32 6 +// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x half> [[VECINIT6_I]], half [[B]], i32 7 +// CHECK-NEXT: [[MUL_I:%.*]] = fmul <8 x half> [[A]], [[VECINIT7_I]] +// CHECK-NEXT: ret <8 x half> [[MUL_I]] // float16x8_t test_vmulq_n_f16(float16x8_t a, float16_t b) { return vmulq_n_f16(a, b); @@ -1858,14 +1858,14 @@ // CHECK-LABEL: define {{[^@]+}}@test_vmulx_n_f16 // CHECK-SAME: (<4 x half> noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[B]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[B]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[B]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[B]], i32 3 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x half> undef, half [[B]], i32 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x half> [[VECINIT_I]], half [[B]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x half> [[VECINIT1_I]], half [[B]], i32 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x half> [[VECINIT2_I]], half [[B]], i32 3 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[VECINIT3]] to <8 x i8> -// CHECK-NEXT: [[VMULX2_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[A]], <4 x half> [[VECINIT3]]) -// CHECK-NEXT: ret <4 x half> [[VMULX2_I]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[VECINIT3_I]] to <8 x i8> +// CHECK-NEXT: [[VMULX2_I_I:%.*]] = call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> [[A]], <4 x half> [[VECINIT3_I]]) +// CHECK-NEXT: ret <4 x half> [[VMULX2_I_I]] // float16x4_t test_vmulx_n_f16(float16x4_t a, float16_t b) { return vmulx_n_f16(a, b); @@ -1874,18 +1874,18 @@ // CHECK-LABEL: define {{[^@]+}}@test_vmulxq_n_f16 // CHECK-SAME: (<8 x half> noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[B]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[B]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[B]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[B]], i32 3 -// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[B]], i32 4 -// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[B]], i32 5 -// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[B]], i32 6 -// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[B]], i32 7 +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x half> undef, half [[B]], i32 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x half> [[VECINIT_I]], half [[B]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x half> [[VECINIT1_I]], half [[B]], i32 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x half> [[VECINIT2_I]], half [[B]], i32 3 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x half> [[VECINIT3_I]], half [[B]], i32 4 +// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x half> [[VECINIT4_I]], half [[B]], i32 5 +// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x half> [[VECINIT5_I]], half [[B]], i32 6 +// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x half> [[VECINIT6_I]], half [[B]], i32 7 // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[VECINIT7]] to <16 x i8> -// CHECK-NEXT: [[VMULX2_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[A]], <8 x half> [[VECINIT7]]) -// CHECK-NEXT: ret <8 x half> [[VMULX2_I]] +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[VECINIT7_I]] to <16 x i8> +// CHECK-NEXT: [[VMULX2_I_I:%.*]] = call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> [[A]], <8 x half> [[VECINIT7_I]]) +// CHECK-NEXT: ret <8 x half> [[VMULX2_I_I]] // float16x8_t test_vmulxq_n_f16(float16x8_t a, float16_t b) { return vmulxq_n_f16(a, b); @@ -1917,9 +1917,8 @@ // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[VMAXV:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK-NEXT: [[VMAXV1:%.*]] = call half @llvm.aarch64.neon.fmaxv.f16.v4f16(<4 x half> [[VMAXV]]) -// CHECK-NEXT: ret half [[VMAXV1]] +// CHECK-NEXT: [[VMAXV1_I:%.*]] = call half @llvm.aarch64.neon.fmaxv.f16.v4f16(<4 x half> [[A]]) +// CHECK-NEXT: ret half [[VMAXV1_I]] // float16_t test_vmaxv_f16(float16x4_t a) { return vmaxv_f16(a); @@ -1929,9 +1928,8 @@ // CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[VMAXV:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> -// CHECK-NEXT: [[VMAXV1:%.*]] = call half @llvm.aarch64.neon.fmaxv.f16.v8f16(<8 x half> [[VMAXV]]) -// CHECK-NEXT: ret half [[VMAXV1]] +// CHECK-NEXT: [[VMAXV1_I:%.*]] = call half @llvm.aarch64.neon.fmaxv.f16.v8f16(<8 x half> [[A]]) +// CHECK-NEXT: ret half [[VMAXV1_I]] // float16_t test_vmaxvq_f16(float16x8_t a) { return vmaxvq_f16(a); @@ -1941,9 +1939,8 @@ // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[VMINV:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK-NEXT: [[VMINV1:%.*]] = call half @llvm.aarch64.neon.fminv.f16.v4f16(<4 x half> [[VMINV]]) -// CHECK-NEXT: ret half [[VMINV1]] +// CHECK-NEXT: [[VMINV1_I:%.*]] = call half @llvm.aarch64.neon.fminv.f16.v4f16(<4 x half> [[A]]) +// CHECK-NEXT: ret half [[VMINV1_I]] // float16_t test_vminv_f16(float16x4_t a) { return vminv_f16(a); @@ -1953,9 +1950,8 @@ // CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[VMINV:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> -// CHECK-NEXT: [[VMINV1:%.*]] = call half @llvm.aarch64.neon.fminv.f16.v8f16(<8 x half> [[VMINV]]) -// CHECK-NEXT: ret half [[VMINV1]] +// CHECK-NEXT: [[VMINV1_I:%.*]] = call half @llvm.aarch64.neon.fminv.f16.v8f16(<8 x half> [[A]]) +// CHECK-NEXT: ret half [[VMINV1_I]] // float16_t test_vminvq_f16(float16x8_t a) { return vminvq_f16(a); @@ -1965,9 +1961,8 @@ // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[VMAXNMV:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK-NEXT: [[VMAXNMV1:%.*]] = call half @llvm.aarch64.neon.fmaxnmv.f16.v4f16(<4 x half> [[VMAXNMV]]) -// CHECK-NEXT: ret half [[VMAXNMV1]] +// CHECK-NEXT: [[VMAXNMV1_I:%.*]] = call half @llvm.aarch64.neon.fmaxnmv.f16.v4f16(<4 x half> [[A]]) +// CHECK-NEXT: ret half [[VMAXNMV1_I]] // float16_t test_vmaxnmv_f16(float16x4_t a) { return vmaxnmv_f16(a); @@ -1977,9 +1972,8 @@ // CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[VMAXNMV:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> -// CHECK-NEXT: [[VMAXNMV1:%.*]] = call half @llvm.aarch64.neon.fmaxnmv.f16.v8f16(<8 x half> [[VMAXNMV]]) -// CHECK-NEXT: ret half [[VMAXNMV1]] +// CHECK-NEXT: [[VMAXNMV1_I:%.*]] = call half @llvm.aarch64.neon.fmaxnmv.f16.v8f16(<8 x half> [[A]]) +// CHECK-NEXT: ret half [[VMAXNMV1_I]] // float16_t test_vmaxnmvq_f16(float16x8_t a) { return vmaxnmvq_f16(a); @@ -1989,9 +1983,8 @@ // CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[VMINNMV:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK-NEXT: [[VMINNMV1:%.*]] = call half @llvm.aarch64.neon.fminnmv.f16.v4f16(<4 x half> [[VMINNMV]]) -// CHECK-NEXT: ret half [[VMINNMV1]] +// CHECK-NEXT: [[VMINNMV1_I:%.*]] = call half @llvm.aarch64.neon.fminnmv.f16.v4f16(<4 x half> [[A]]) +// CHECK-NEXT: ret half [[VMINNMV1_I]] // float16_t test_vminnmv_f16(float16x4_t a) { return vminnmv_f16(a); @@ -2001,9 +1994,8 @@ // CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[VMINNMV:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> -// CHECK-NEXT: [[VMINNMV1:%.*]] = call half @llvm.aarch64.neon.fminnmv.f16.v8f16(<8 x half> [[VMINNMV]]) -// CHECK-NEXT: ret half [[VMINNMV1]] +// CHECK-NEXT: [[VMINNMV1_I:%.*]] = call half @llvm.aarch64.neon.fminnmv.f16.v8f16(<8 x half> [[A]]) +// CHECK-NEXT: ret half [[VMINNMV1_I]] // float16_t test_vminnmvq_f16(float16x8_t a) { return vminnmvq_f16(a); @@ -2200,11 +2192,11 @@ // CHECK-LABEL: define {{[^@]+}}@test_vmov_n_f16 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[A]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 -// CHECK-NEXT: ret <4 x half> [[VECINIT3]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x half> undef, half [[A]], i32 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x half> [[VECINIT_I]], half [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x half> [[VECINIT1_I]], half [[A]], i32 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x half> [[VECINIT2_I]], half [[A]], i32 3 +// CHECK-NEXT: ret <4 x half> [[VECINIT3_I]] // float16x4_t test_vmov_n_f16(float16_t a) { return vmov_n_f16(a); @@ -2213,15 +2205,15 @@ // CHECK-LABEL: define {{[^@]+}}@test_vmovq_n_f16 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[A]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 -// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 -// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 -// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 -// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 -// CHECK-NEXT: ret <8 x half> [[VECINIT7]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x half> undef, half [[A]], i32 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x half> [[VECINIT_I]], half [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x half> [[VECINIT1_I]], half [[A]], i32 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x half> [[VECINIT2_I]], half [[A]], i32 3 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x half> [[VECINIT3_I]], half [[A]], i32 4 +// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x half> [[VECINIT4_I]], half [[A]], i32 5 +// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x half> [[VECINIT5_I]], half [[A]], i32 6 +// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x half> [[VECINIT6_I]], half [[A]], i32 7 +// CHECK-NEXT: ret <8 x half> [[VECINIT7_I]] // float16x8_t test_vmovq_n_f16(float16_t a) { return vmovq_n_f16(a); @@ -2230,11 +2222,11 @@ // CHECK-LABEL: define {{[^@]+}}@test_vdup_n_f16 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> undef, half [[A]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 -// CHECK-NEXT: ret <4 x half> [[VECINIT3]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x half> undef, half [[A]], i32 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <4 x half> [[VECINIT_I]], half [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <4 x half> [[VECINIT1_I]], half [[A]], i32 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <4 x half> [[VECINIT2_I]], half [[A]], i32 3 +// CHECK-NEXT: ret <4 x half> [[VECINIT3_I]] // float16x4_t test_vdup_n_f16(float16_t a) { return vdup_n_f16(a); @@ -2243,15 +2235,15 @@ // CHECK-LABEL: define {{[^@]+}}@test_vdupq_n_f16 // CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR1]] { // CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> undef, half [[A]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 -// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 -// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 -// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 -// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 -// CHECK-NEXT: ret <8 x half> [[VECINIT7]] +// CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x half> undef, half [[A]], i32 0 +// CHECK-NEXT: [[VECINIT1_I:%.*]] = insertelement <8 x half> [[VECINIT_I]], half [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2_I:%.*]] = insertelement <8 x half> [[VECINIT1_I]], half [[A]], i32 2 +// CHECK-NEXT: [[VECINIT3_I:%.*]] = insertelement <8 x half> [[VECINIT2_I]], half [[A]], i32 3 +// CHECK-NEXT: [[VECINIT4_I:%.*]] = insertelement <8 x half> [[VECINIT3_I]], half [[A]], i32 4 +// CHECK-NEXT: [[VECINIT5_I:%.*]] = insertelement <8 x half> [[VECINIT4_I]], half [[A]], i32 5 +// CHECK-NEXT: [[VECINIT6_I:%.*]] = insertelement <8 x half> [[VECINIT5_I]], half [[A]], i32 6 +// CHECK-NEXT: [[VECINIT7_I:%.*]] = insertelement <8 x half> [[VECINIT6_I]], half [[A]], i32 7 +// CHECK-NEXT: ret <8 x half> [[VECINIT7_I]] // float16x8_t test_vdupq_n_f16(float16_t a) { return vdupq_n_f16(a); Index: clang/utils/TableGen/NeonEmitter.cpp =================================================================== --- clang/utils/TableGen/NeonEmitter.cpp +++ clang/utils/TableGen/NeonEmitter.cpp @@ -395,11 +395,7 @@ // Pointer arguments need to use macros to avoid hiding aligned attributes // from the pointer type. - - // It is not permitted to pass or return an __fp16 by value, so intrinsics - // taking a scalar float16_t must be implemented as macros. - if (Type.isImmediate() || Type.isPointer() || - (Type.isScalar() && Type.isHalf())) + if (Type.isImmediate() || Type.isPointer()) UseMacro = true; } }