Index: test/CodeGen/aarch64-neon-vget.c =================================================================== --- test/CodeGen/aarch64-neon-vget.c +++ test/CodeGen/aarch64-neon-vget.c @@ -80,18 +80,12 @@ } // CHECK-LABEL: define float @test_vget_lane_f16(<4 x half> %a) #0 { -// CHECK: [[__REINT_242:%.*]] = alloca <4 x half>, align 8 -// CHECK: [[__REINT1_242:%.*]] = alloca i16, align 2 -// CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8 -// CHECK: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>* -// CHECK: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8 -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1 -// CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2 -// CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half* -// CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2 -// CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float +// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP2]], i32 1 +// CHECK: [[TMP3:%.*]] = bitcast i16 [[VGET_LANE]] to half +// CHECK: [[CONV:%.*]] = fpext half [[TMP3]] to float // CHECK: ret float [[CONV]] float32_t test_vget_lane_f16(float16x4_t a) { return vget_lane_f16(a, 1); @@ -173,18 +167,12 @@ } // CHECK-LABEL: define float @test_vgetq_lane_f16(<8 x half> %a) #1 { -// CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16 -// CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2 -// CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16 -// CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>* -// CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16 -// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 -// CHECK: store i16 [[VGETQ_LANE]], i16* [[__REINT1_244]], align 2 -// CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half* -// CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2 -// CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float +// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> +// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP2]], i32 3 +// CHECK: [[TMP3:%.*]] = bitcast i16 [[VGETQ_LANE]] to half +// CHECK: [[CONV:%.*]] = fpext half [[TMP3]] to float // CHECK: ret float [[CONV]] float32_t test_vgetq_lane_f16(float16x8_t a) { return vgetq_lane_f16(a, 3); @@ -303,23 +291,14 @@ } // CHECK-LABEL: define <4 x half> @test_vset_lane_f16(half* %a, <4 x half> %b) #0 { -// CHECK: [[__REINT_246:%.*]] = alloca half, align 2 -// CHECK: [[__REINT1_246:%.*]] = alloca <4 x half>, align 8 -// CHECK: [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 -// CHECK: store half [[TMP0]], half* [[__REINT_246]], align 2 -// CHECK: store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8 -// CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16* -// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK: [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8 -// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> -// CHECK: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 3 -// CHECK: store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8 -// CHECK: [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>* -// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8 -// CHECK: ret <4 x half> [[TMP8]] +// CHECK: [[TMP1:%.*]] = bitcast half [[TMP0]] to i16 +// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %b to <4 x i16> +// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8> +// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP1]], i32 3 +// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[VSET_LANE]] to <4 x half> +// CHECK: ret <4 x half> [[TMP5]] float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) { return vset_lane_f16(*a, b, 3); } @@ -400,23 +379,14 @@ } // CHECK-LABEL: define <8 x half> @test_vsetq_lane_f16(half* %a, <8 x half> %b) #1 { -// CHECK: [[__REINT_248:%.*]] = alloca half, align 2 -// CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16 -// CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 -// CHECK: store half [[TMP0]], half* [[__REINT_248]], align 2 -// CHECK: store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16 -// CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16* -// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16 -// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> -// CHECK: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 7 -// CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16 -// CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>* -// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16 -// CHECK: ret <8 x half> [[TMP8]] +// CHECK: [[TMP1:%.*]] = bitcast half [[TMP0]] to i16 +// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %b to <8 x i16> +// CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8> +// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16> +// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP1]], i32 7 +// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[VSET_LANE]] to <8 x half> +// CHECK: ret <8 x half> [[TMP5]] float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) { return vsetq_lane_f16(*a, b, 7); } Index: test/CodeGen/arm_neon_intrinsics.c =================================================================== --- test/CodeGen/arm_neon_intrinsics.c +++ test/CodeGen/arm_neon_intrinsics.c @@ -3387,18 +3387,12 @@ } // CHECK-LABEL: @test_vget_lane_f16( -// CHECK: [[__REINT_242:%.*]] = alloca <4 x half>, align 8 -// CHECK: [[__REINT1_242:%.*]] = alloca i16, align 2 -// CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8 -// CHECK: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>* -// CHECK: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8 -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1 -// CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2 -// CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half* -// CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2 -// CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float +// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <4 x i16> +// CHECK: [[TMP1:%.*]] = bitcast <4 x i16> [[TMP0]] to <8 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP2]], i32 1 +// CHECK: [[TMP4:%.*]] = bitcast i16 [[VGET_LANE]] to half +// CHECK: [[CONV:%.*]] = fpext half [[TMP4]] to float // CHECK: ret float [[CONV]] float32_t test_vget_lane_f16(float16x4_t a) { return vget_lane_f16(a, 1); @@ -3480,18 +3474,12 @@ } // CHECK-LABEL: @test_vgetq_lane_f16( -// CHECK: [[__REINT_244:%.*]] = alloca <8 x half>, align 16 -// CHECK: [[__REINT1_244:%.*]] = alloca i16, align 2 -// CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16 -// CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>* -// CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16 -// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 -// CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_244]], align 2 -// CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half* -// CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2 -// CHECK: [[CONV:%.*]] = fpext half [[TMP5]] to float +// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <8 x i16> +// CHECK: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <16 x i8> +// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> +// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP2]], i32 3 +// CHECK: [[TMP4:%.*]] = bitcast i16 [[VGET_LANE]] to half +// CHECK: [[CONV:%.*]] = fpext half [[TMP4]] to float // CHECK: ret float [[CONV]] float32_t test_vgetq_lane_f16(float16x8_t a) { return vgetq_lane_f16(a, 3); @@ -14129,23 +14117,14 @@ } // CHECK-LABEL: @test_vset_lane_f16( -// CHECK: [[__REINT_246:%.*]] = alloca half, align 2 -// CHECK: [[__REINT1_246:%.*]] = alloca <4 x half>, align 8 -// CHECK: [[__REINT2_246:%.*]] = alloca <4 x i16>, align 8 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 -// CHECK: store half [[TMP0]], half* [[__REINT_246]], align 2 -// CHECK: store <4 x half> %b, <4 x half>* [[__REINT1_246]], align 8 -// CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_246]] to i16* -// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK: [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>* -// CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8 -// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> -// CHECK: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 1 -// CHECK: store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8 -// CHECK: [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>* -// CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8 -// CHECK: ret <4 x half> [[TMP8]] +// CHECK: [[TMP1:%.*]] = bitcast half [[TMP0]] to i16 +// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %b to <4 x i16> +// CHECK: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to <8 x i8> +// CHECK: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP1]], i32 1 +// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[VSET_LANE]] to <4 x half> +// CHECK: ret <4 x half> [[TMP5]] float16x4_t test_vset_lane_f16(float16_t *a, float16x4_t b) { return vset_lane_f16(*a, b, 1); } @@ -14226,23 +14205,14 @@ } // CHECK-LABEL: @test_vsetq_lane_f16( -// CHECK: [[__REINT_248:%.*]] = alloca half, align 2 -// CHECK: [[__REINT1_248:%.*]] = alloca <8 x half>, align 16 -// CHECK: [[__REINT2_248:%.*]] = alloca <8 x i16>, align 16 // CHECK: [[TMP0:%.*]] = load half, half* %a, align 2 -// CHECK: store half [[TMP0]], half* [[__REINT_248]], align 2 -// CHECK: store <8 x half> %b, <8 x half>* [[__REINT1_248]], align 16 -// CHECK: [[TMP1:%.*]] = bitcast half* [[__REINT_248]] to i16* -// CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>* -// CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16 -// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> -// CHECK: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 3 -// CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16 -// CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>* -// CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16 -// CHECK: ret <8 x half> [[TMP8]] +// CHECK: [[TMP1:%.*]] = bitcast half [[TMP0]] to i16 +// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %b to <8 x i16> +// CHECK: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <16 x i8> +// CHECK: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16> +// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP1]], i32 3 +// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[VSET_LANE]] to <8 x half> +// CHECK: ret <8 x half> [[TMP5]] float16x8_t test_vsetq_lane_f16(float16_t *a, float16x8_t b) { return vsetq_lane_f16(*a, b, 3); }