diff --git a/clang/test/CodeGen/aarch64-neon-2velem.c b/clang/test/CodeGen/aarch64-neon-2velem.c --- a/clang/test/CodeGen/aarch64-neon-2velem.c +++ b/clang/test/CodeGen/aarch64-neon-2velem.c @@ -451,9 +451,7 @@ } // CHECK-LABEL: @test_vfmas_laneq_f32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 +// CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> %v, i32 3 // CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a) // CHECK: ret float [[TMP2]] float32_t test_vfmas_laneq_f32(float32_t a, float32_t b, float32x4_t v) { @@ -462,9 +460,7 @@ // CHECK-LABEL: @test_vfmsd_lane_f64( // CHECK: [[SUB:%.*]] = fsub double -0.000000e+00, %b -// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %v to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 +// CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> %v, i32 0 // CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]], double [[EXTRACT]], double %a) // CHECK: ret double [[TMP2]] float64_t test_vfmsd_lane_f64(float64_t a, float64_t b, float64x1_t v) { @@ -473,9 +469,7 @@ // CHECK-LABEL: @test_vfmss_laneq_f32( // CHECK: [[SUB:%.*]] = fsub float -0.000000e+00, %b -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %v to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 +// CHECK: [[EXTRACT:%.*]] = extractelement <4 x float> %v, i32 3 // CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a) // CHECK: ret float [[TMP2]] float32_t test_vfmss_laneq_f32(float32_t a, float32_t b, float32x4_t v) { @@ -484,9 +478,7 @@ // CHECK-LABEL: @test_vfmsd_laneq_f64( // CHECK: [[SUB:%.*]] = fsub double -0.000000e+00, %b -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %v to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> %v, i32 1 // CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double [[SUB]], double [[EXTRACT]], double %a) // CHECK: ret double [[TMP2]] float64_t test_vfmsd_laneq_f64(float64_t a, float64_t b, float64x2_t v) { diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -8703,7 +8703,6 @@ } // CHECK-LABEL: @test_vpaddd_s64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VPADDD_S64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) // CHECK: ret i64 [[VPADDD_S64_I]] int64_t test_vpaddd_s64(int64x2_t a) { @@ -8711,7 +8710,6 @@ } // CHECK-LABEL: @test_vpadds_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[LANE0_I:%.*]] = extractelement <2 x float> %a, i64 0 // CHECK: [[LANE1_I:%.*]] = extractelement <2 x float> %a, i64 1 // CHECK: [[VPADDD_I:%.*]] = fadd float [[LANE0_I]], [[LANE1_I]] @@ -8721,7 +8719,6 @@ } // CHECK-LABEL: @test_vpaddd_f64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[LANE0_I:%.*]] = extractelement <2 x double> %a, i64 0 // CHECK: [[LANE1_I:%.*]] = extractelement <2 x double> %a, i64 1 // CHECK: [[VPADDD_I:%.*]] = fadd double [[LANE0_I]], [[LANE1_I]] @@ -8731,7 +8728,6 @@ } // CHECK-LABEL: @test_vpmaxnms_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VPMAXNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VPMAXNMS_F32_I]] float32_t test_vpmaxnms_f32(float32x2_t a) { @@ -8739,7 +8735,6 @@ } // CHECK-LABEL: @test_vpmaxnmqd_f64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VPMAXNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VPMAXNMQD_F64_I]] float64_t test_vpmaxnmqd_f64(float64x2_t a) { @@ -8747,7 +8742,6 @@ } // CHECK-LABEL: @test_vpmaxs_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VPMAXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VPMAXS_F32_I]] float32_t test_vpmaxs_f32(float32x2_t a) { @@ -8755,7 +8749,6 @@ } // CHECK-LABEL: @test_vpmaxqd_f64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VPMAXQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VPMAXQD_F64_I]] float64_t test_vpmaxqd_f64(float64x2_t a) { @@ -8763,7 +8756,6 @@ } // CHECK-LABEL: @test_vpminnms_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VPMINNMS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VPMINNMS_F32_I]] float32_t test_vpminnms_f32(float32x2_t a) { @@ -8771,7 +8763,6 @@ } // CHECK-LABEL: @test_vpminnmqd_f64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VPMINNMQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VPMINNMQD_F64_I]] float64_t test_vpminnmqd_f64(float64x2_t a) { @@ -8779,7 +8770,6 @@ } // CHECK-LABEL: @test_vpmins_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VPMINS_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VPMINS_F32_I]] float32_t test_vpmins_f32(float32x2_t a) { @@ -8787,7 +8777,6 @@ } // CHECK-LABEL: @test_vpminqd_f64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VPMINQD_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VPMINQD_F64_I]] float64_t test_vpminqd_f64(float64x2_t a) { @@ -17690,7 +17679,6 @@ } // CHECK-LABEL: @test_vaddv_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VADDV_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VADDV_F32_I]] float32_t test_vaddv_f32(float32x2_t a) { @@ -17698,7 +17686,6 @@ } // CHECK-LABEL: @test_vaddvq_f32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> // CHECK: [[VADDVQ_F32_I:%.*]] = call float @llvm.aarch64.neon.faddv.f32.v4f32(<4 x float> %a) // CHECK: ret float [[VADDVQ_F32_I]] float32_t test_vaddvq_f32(float32x4_t a) { @@ -17706,7 +17693,6 @@ } // CHECK-LABEL: @test_vaddvq_f64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VADDVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.faddv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VADDVQ_F64_I]] float64_t test_vaddvq_f64(float64x2_t a) { @@ -17714,7 +17700,6 @@ } // CHECK-LABEL: @test_vmaxv_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VMAXV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VMAXV_F32_I]] float32_t test_vmaxv_f32(float32x2_t a) { @@ -17722,7 +17707,6 @@ } // CHECK-LABEL: @test_vmaxvq_f64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VMAXVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VMAXVQ_F64_I]] float64_t test_vmaxvq_f64(float64x2_t a) { @@ -17730,7 +17714,6 @@ } // CHECK-LABEL: @test_vminv_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VMINV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VMINV_F32_I]] float32_t test_vminv_f32(float32x2_t a) { @@ -17738,7 +17721,6 @@ } // CHECK-LABEL: @test_vminvq_f64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VMINVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VMINVQ_F64_I]] float64_t test_vminvq_f64(float64x2_t a) { @@ -17746,7 +17728,6 @@ } // CHECK-LABEL: @test_vmaxnmvq_f64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VMAXNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fmaxnmv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VMAXNMVQ_F64_I]] float64_t test_vmaxnmvq_f64(float64x2_t a) { @@ -17754,7 +17735,6 @@ } // CHECK-LABEL: @test_vmaxnmv_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VMAXNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fmaxnmv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VMAXNMV_F32_I]] float32_t test_vmaxnmv_f32(float32x2_t a) { @@ -17762,7 +17742,6 @@ } // CHECK-LABEL: @test_vminnmvq_f64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> // CHECK: [[VMINNMVQ_F64_I:%.*]] = call double @llvm.aarch64.neon.fminnmv.f64.v2f64(<2 x double> %a) // CHECK: ret double [[VMINNMVQ_F64_I]] float64_t test_vminnmvq_f64(float64x2_t a) { @@ -17770,7 +17749,6 @@ } // CHECK-LABEL: @test_vminnmv_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> // CHECK: [[VMINNMV_F32_I:%.*]] = call float @llvm.aarch64.neon.fminnmv.f32.v2f32(<2 x float> %a) // CHECK: ret float [[VMINNMV_F32_I]] float32_t test_vminnmv_f32(float32x2_t a) { @@ -17798,7 +17776,6 @@ } // CHECK-LABEL: @test_vpaddd_u64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VPADDD_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) // CHECK: ret i64 [[VPADDD_U64_I]] uint64_t test_vpaddd_u64(uint64x2_t a) { @@ -17806,7 +17783,6 @@ } // CHECK-LABEL: @test_vaddvq_s64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VADDVQ_S64_I:%.*]] = call i64 @llvm.aarch64.neon.saddv.i64.v2i64(<2 x i64> %a) // CHECK: ret i64 [[VADDVQ_S64_I]] int64_t test_vaddvq_s64(int64x2_t a) { @@ -17814,7 +17790,6 @@ } // CHECK-LABEL: @test_vaddvq_u64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> // CHECK: [[VADDVQ_U64_I:%.*]] = call i64 @llvm.aarch64.neon.uaddv.i64.v2i64(<2 x i64> %a) // CHECK: ret i64 [[VADDVQ_U64_I]] uint64_t test_vaddvq_u64(uint64x2_t a) { @@ -18178,7 +18153,6 @@ } // CHECK-LABEL: @test_vminv_s32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VMINV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sminv.i32.v2i32(<2 x i32> %a) // CHECK: ret i32 [[VMINV_S32_I]] int32_t test_vminv_s32(int32x2_t a) { @@ -18186,7 +18160,6 @@ } // CHECK-LABEL: @test_vminv_u32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VMINV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uminv.i32.v2i32(<2 x i32> %a) // CHECK: ret i32 [[VMINV_U32_I]] uint32_t test_vminv_u32(uint32x2_t a) { @@ -18194,7 +18167,6 @@ } // CHECK-LABEL: @test_vmaxv_s32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VMAXV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.smaxv.i32.v2i32(<2 x i32> %a) // CHECK: ret i32 [[VMAXV_S32_I]] int32_t test_vmaxv_s32(int32x2_t a) { @@ -18202,7 +18174,6 @@ } // CHECK-LABEL: @test_vmaxv_u32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VMAXV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.umaxv.i32.v2i32(<2 x i32> %a) // CHECK: ret i32 [[VMAXV_U32_I]] uint32_t test_vmaxv_u32(uint32x2_t a) { @@ -18210,7 +18181,6 @@ } // CHECK-LABEL: @test_vaddv_s32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VADDV_S32_I:%.*]] = call i32 @llvm.aarch64.neon.saddv.i32.v2i32(<2 x i32> %a) // CHECK: ret i32 [[VADDV_S32_I]] int32_t test_vaddv_s32(int32x2_t a) { @@ -18218,7 +18188,6 @@ } // CHECK-LABEL: @test_vaddv_u32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VADDV_U32_I:%.*]] = call i32 @llvm.aarch64.neon.uaddv.i32.v2i32(<2 x i32> %a) // CHECK: ret i32 [[VADDV_U32_I]] uint32_t test_vaddv_u32(uint32x2_t a) { @@ -18226,7 +18195,6 @@ } // CHECK-LABEL: @test_vaddlv_s32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VADDLV_S32_I:%.*]] = call i64 @llvm.aarch64.neon.saddlv.i64.v2i32(<2 x i32> %a) // CHECK: ret i64 [[VADDLV_S32_I]] int64_t test_vaddlv_s32(int32x2_t a) { @@ -18234,7 +18202,6 @@ } // CHECK-LABEL: @test_vaddlv_u32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> // CHECK: [[VADDLV_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v2i32(<2 x i32> %a) // CHECK: ret i64 [[VADDLV_U32_I]] uint64_t test_vaddlv_u32(uint32x2_t a) { diff --git a/clang/test/CodeGen/aarch64-neon-scalar-copy.c b/clang/test/CodeGen/aarch64-neon-scalar-copy.c --- a/clang/test/CodeGen/aarch64-neon-scalar-copy.c +++ b/clang/test/CodeGen/aarch64-neon-scalar-copy.c @@ -4,9 +4,7 @@ #include // CHECK-LABEL: define float @test_vdups_lane_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VDUPS_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +// CHECK: [[VDUPS_LANE:%.*]] = extractelement <2 x float> %a, i32 1 // CHECK: ret float [[VDUPS_LANE]] float32_t test_vdups_lane_f32(float32x2_t a) { return vdups_lane_f32(a, 1); @@ -14,9 +12,7 @@ // CHECK-LABEL: define double @test_vdupd_lane_f64(<1 x double> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VDUPD_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 +// CHECK: [[VDUPD_LANE:%.*]] = extractelement <1 x double> %a, i32 0 // CHECK: ret double [[VDUPD_LANE]] float64_t test_vdupd_lane_f64(float64x1_t a) { return vdupd_lane_f64(a, 0); @@ -24,9 +20,7 @@ // CHECK-LABEL: define float @test_vdups_laneq_f32(<4 x float> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %a, i32 3 // CHECK: ret float [[VGETQ_LANE]] float32_t test_vdups_laneq_f32(float32x4_t a) { return vdups_laneq_f32(a, 3); @@ -34,9 +28,7 @@ // CHECK-LABEL: define double @test_vdupd_laneq_f64(<2 x double> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %a, i32 1 // CHECK: ret double [[VGETQ_LANE]] float64_t test_vdupd_laneq_f64(float64x2_t a) { return vdupd_laneq_f64(a, 1); @@ -52,9 +44,7 @@ // CHECK-LABEL: define i16 @test_vduph_lane_s16(<4 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3 // CHECK: ret i16 [[VGET_LANE]] int16_t test_vduph_lane_s16(int16x4_t a) { return vduph_lane_s16(a, 3); @@ -62,9 +52,7 @@ // CHECK-LABEL: define i32 @test_vdups_lane_s32(<2 x i32> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1 // CHECK: ret i32 [[VGET_LANE]] int32_t test_vdups_lane_s32(int32x2_t a) { return vdups_lane_s32(a, 1); @@ -72,9 +60,7 @@ // CHECK-LABEL: define i64 @test_vdupd_lane_s64(<1 x i64> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0 // CHECK: ret i64 [[VGET_LANE]] int64_t test_vdupd_lane_s64(int64x1_t a) { return vdupd_lane_s64(a, 0); @@ -90,9 +76,7 @@ // CHECK-LABEL: define i16 @test_vduph_lane_u16(<4 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3 // CHECK: ret i16 [[VGET_LANE]] uint16_t test_vduph_lane_u16(uint16x4_t a) { return vduph_lane_u16(a, 3); @@ -100,9 +84,7 @@ // CHECK-LABEL: define i32 @test_vdups_lane_u32(<2 x i32> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1 // CHECK: ret i32 [[VGET_LANE]] uint32_t test_vdups_lane_u32(uint32x2_t a) { return vdups_lane_u32(a, 1); @@ -110,9 +92,7 @@ // CHECK-LABEL: define i64 @test_vdupd_lane_u64(<1 x i64> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0 // CHECK: ret i64 [[VGET_LANE]] uint64_t test_vdupd_lane_u64(uint64x1_t a) { return vdupd_lane_u64(a, 0); @@ -127,9 +107,7 @@ // CHECK-LABEL: define i16 @test_vduph_laneq_s16(<8 x i16> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7 // CHECK: ret i16 [[VGETQ_LANE]] int16_t test_vduph_laneq_s16(int16x8_t a) { return vduph_laneq_s16(a, 7); @@ -137,9 +115,7 @@ // CHECK-LABEL: define i32 @test_vdups_laneq_s32(<4 x i32> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3 // CHECK: ret i32 [[VGETQ_LANE]] int32_t test_vdups_laneq_s32(int32x4_t a) { return vdups_laneq_s32(a, 3); @@ -147,9 +123,7 @@ // CHECK-LABEL: define i64 @test_vdupd_laneq_s64(<2 x i64> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1 // CHECK: ret i64 [[VGETQ_LANE]] int64_t test_vdupd_laneq_s64(int64x2_t a) { return vdupd_laneq_s64(a, 1); @@ -165,9 +139,7 @@ // CHECK-LABEL: define i16 @test_vduph_laneq_u16(<8 x i16> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7 // CHECK: ret i16 [[VGETQ_LANE]] uint16_t test_vduph_laneq_u16(uint16x8_t a) { return vduph_laneq_u16(a, 7); @@ -175,9 +147,7 @@ // CHECK-LABEL: define i32 @test_vdups_laneq_u32(<4 x i32> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3 // CHECK: ret i32 [[VGETQ_LANE]] uint32_t test_vdups_laneq_u32(uint32x4_t a) { return vdups_laneq_u32(a, 3); @@ -185,9 +155,7 @@ // CHECK-LABEL: define i64 @test_vdupd_laneq_u64(<2 x i64> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1 // CHECK: ret i64 [[VGETQ_LANE]] uint64_t test_vdupd_laneq_u64(uint64x2_t a) { return vdupd_laneq_u64(a, 1); @@ -201,9 +169,7 @@ } // CHECK-LABEL: define i16 @test_vduph_lane_p16(<4 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3 // CHECK: ret i16 [[VGET_LANE]] poly16_t test_vduph_lane_p16(poly16x4_t a) { return vduph_lane_p16(a, 3); @@ -217,9 +183,7 @@ } // CHECK-LABEL: define i16 @test_vduph_laneq_p16(<8 x i16> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7 // CHECK: ret i16 [[VGETQ_LANE]] poly16_t test_vduph_laneq_p16(poly16x8_t a) { return vduph_laneq_p16(a, 7); diff --git a/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c b/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c --- a/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c +++ b/clang/test/CodeGen/aarch64-neon-scalar-x-indexed-elem.c @@ -7,9 +7,7 @@ // CHECK-LABEL: define float @test_vmuls_lane_f32(float %a, <2 x float> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %b, i32 1 // CHECK: [[MUL:%.*]] = fmul float %a, [[VGET_LANE]] // CHECK: ret float [[MUL]] float32_t test_vmuls_lane_f32(float32_t a, float32x2_t b) { @@ -17,9 +15,7 @@ } // CHECK-LABEL: define double @test_vmuld_lane_f64(double %a, <1 x double> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %b, i32 0 // CHECK: [[MUL:%.*]] = fmul double %a, [[VGET_LANE]] // CHECK: ret double [[MUL]] float64_t test_vmuld_lane_f64(float64_t a, float64x1_t b) { @@ -27,9 +23,7 @@ } // CHECK-LABEL: define float @test_vmuls_laneq_f32(float %a, <4 x float> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %b, i32 3 // CHECK: [[MUL:%.*]] = fmul float %a, [[VGETQ_LANE]] // CHECK: ret float [[MUL]] float32_t test_vmuls_laneq_f32(float32_t a, float32x4_t b) { @@ -37,9 +31,7 @@ } // CHECK-LABEL: define double @test_vmuld_laneq_f64(double %a, <2 x double> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1 // CHECK: [[MUL:%.*]] = fmul double %a, [[VGETQ_LANE]] // CHECK: ret double [[MUL]] float64_t test_vmuld_laneq_f64(float64_t a, float64x2_t b) { @@ -56,9 +48,7 @@ } // CHECK-LABEL: define float @test_vmulxs_lane_f32(float %a, <2 x float> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %b, i32 1 // CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGET_LANE]]) // CHECK: ret float [[VMULXS_F32_I]] float32_t test_vmulxs_lane_f32(float32_t a, float32x2_t b) { @@ -66,9 +56,7 @@ } // CHECK-LABEL: define float @test_vmulxs_laneq_f32(float %a, <4 x float> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %b, i32 3 // CHECK: [[VMULXS_F32_I:%.*]] = call float @llvm.aarch64.neon.fmulx.f32(float %a, float [[VGETQ_LANE]]) // CHECK: ret float [[VMULXS_F32_I]] float32_t test_vmulxs_laneq_f32(float32_t a, float32x4_t b) { @@ -76,9 +64,7 @@ } // CHECK-LABEL: define double @test_vmulxd_lane_f64(double %a, <1 x double> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %b, i32 0 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGET_LANE]]) // CHECK: ret double [[VMULXD_F64_I]] float64_t test_vmulxd_lane_f64(float64_t a, float64x1_t b) { @@ -86,9 +72,7 @@ } // CHECK-LABEL: define double @test_vmulxd_laneq_f64(double %a, <2 x double> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double %a, double [[VGETQ_LANE]]) // CHECK: ret double [[VMULXD_F64_I]] float64_t test_vmulxd_laneq_f64(float64_t a, float64x2_t b) { @@ -96,16 +80,10 @@ } // CHECK-LABEL: define <1 x double> @test_vmulx_lane_f64(<1 x double> %a, <1 x double> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 -// CHECK: [[TMP2:%.*]] = bitcast <1 x double> %b to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> -// CHECK: [[VGET_LANE6:%.*]] = extractelement <1 x double> [[TMP3]], i32 0 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0 +// CHECK: [[VGET_LANE6:%.*]] = extractelement <1 x double> %b, i32 0 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE6]]) -// CHECK: [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]], double [[VMULXD_F64_I]], i32 0 +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0 // CHECK: ret <1 x double> [[VSET_LANE]] float64x1_t test_vmulx_lane_f64(float64x1_t a, float64x1_t b) { return vmulx_lane_f64(a, b, 0); @@ -113,32 +91,20 @@ // CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_0(<1 x double> %a, <2 x double> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 -// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 0 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) -// CHECK: [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]], double [[VMULXD_F64_I]], i32 0 +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0 // CHECK: ret <1 x double> [[VSET_LANE]] float64x1_t test_vmulx_laneq_f64_0(float64x1_t a, float64x2_t b) { return vmulx_laneq_f64(a, b, 0); } // CHECK-LABEL: define <1 x double> @test_vmulx_laneq_f64_1(<1 x double> %a, <2 x double> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 -// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %b to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> %a, i32 0 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %b, i32 1 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) -// CHECK: [[TMP4:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP5]], double [[VMULXD_F64_I]], i32 0 +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> %a, double [[VMULXD_F64_I]], i32 0 // CHECK: ret <1 x double> [[VSET_LANE]] float64x1_t test_vmulx_laneq_f64_1(float64x1_t a, float64x2_t b) { return vmulx_laneq_f64(a, b, 1); @@ -146,9 +112,7 @@ // CHECK-LABEL: define float @test_vfmas_lane_f32(float %a, float %b, <2 x float> %c) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %c to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[EXTRACT:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +// CHECK: [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1 // CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float %b, float [[EXTRACT]], float %a) // CHECK: ret float [[TMP2]] float32_t test_vfmas_lane_f32(float32_t a, float32_t b, float32x2_t c) { @@ -156,9 +120,7 @@ } // CHECK-LABEL: define double @test_vfmad_lane_f64(double %a, double %b, <1 x double> %c) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x double> %c to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x double> -// CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 +// CHECK: [[EXTRACT:%.*]] = extractelement <1 x double> %c, i32 0 // CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a) // CHECK: ret double [[TMP2]] float64_t test_vfmad_lane_f64(float64_t a, float64_t b, float64x1_t c) { @@ -166,9 +128,7 @@ } // CHECK-LABEL: define double @test_vfmad_laneq_f64(double %a, double %b, <2 x double> %c) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %c to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 +// CHECK: [[EXTRACT:%.*]] = extractelement <2 x double> %c, i32 1 // CHECK: [[TMP2:%.*]] = call double @llvm.fma.f64(double %b, double [[EXTRACT]], double %a) // CHECK: ret double [[TMP2]] float64_t test_vfmad_laneq_f64(float64_t a, float64_t b, float64x2_t c) { @@ -177,9 +137,7 @@ // CHECK-LABEL: define float @test_vfmss_lane_f32(float %a, float %b, <2 x float> %c) #0 { // CHECK: [[SUB:%.*]] = fsub float -0.000000e+00, %b -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %c to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[EXTRACT:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +// CHECK: [[EXTRACT:%.*]] = extractelement <2 x float> %c, i32 1 // CHECK: [[TMP2:%.*]] = call float @llvm.fma.f32(float [[SUB]], float [[EXTRACT]], float %a) // CHECK: ret float [[TMP2]] float32_t test_vfmss_lane_f32(float32_t a, float32_t b, float32x2_t c) { @@ -247,9 +205,7 @@ } // CHECK-LABEL: define i32 @test_vqdmullh_lane_s16(i16 %a, <4 x i16> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0 // CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) @@ -260,9 +216,7 @@ } // CHECK-LABEL: define i64 @test_vqdmulls_lane_s32(i32 %a, <2 x i32> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1 // CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGET_LANE]]) // CHECK: ret i64 [[VQDMULLS_S32_I]] int64_t test_vqdmulls_lane_s32(int32_t a, int32x2_t b) { @@ -270,9 +224,7 @@ } // CHECK-LABEL: define i32 @test_vqdmullh_laneq_s16(i16 %a, <8 x i16> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0 // CHECK: [[VQDMULLH_S16_I:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) @@ -283,9 +235,7 @@ } // CHECK-LABEL: define i64 @test_vqdmulls_laneq_s32(i32 %a, <4 x i32> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3 // CHECK: [[VQDMULLS_S32_I:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %a, i32 [[VGETQ_LANE]]) // CHECK: ret i64 [[VQDMULLS_S32_I]] int64_t test_vqdmulls_laneq_s32(int32_t a, int32x4_t b) { @@ -293,9 +243,7 @@ } // CHECK-LABEL: define i16 @test_vqdmulhh_lane_s16(i16 %a, <4 x i16> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0 // CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) @@ -306,9 +254,7 @@ } // CHECK-LABEL: define i32 @test_vqdmulhs_lane_s32(i32 %a, <2 x i32> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1 // CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGET_LANE]]) // CHECK: ret i32 [[VQDMULHS_S32_I]] int32_t test_vqdmulhs_lane_s32(int32_t a, int32x2_t b) { @@ -317,9 +263,7 @@ // CHECK-LABEL: define i16 @test_vqdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0 // CHECK: [[VQDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) @@ -331,9 +275,7 @@ // CHECK-LABEL: define i32 @test_vqdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3 // CHECK: [[VQDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqdmulh.i32(i32 %a, i32 [[VGETQ_LANE]]) // CHECK: ret i32 [[VQDMULHS_S32_I]] int32_t test_vqdmulhs_laneq_s32(int32_t a, int32x4_t b) { @@ -341,9 +283,7 @@ } // CHECK-LABEL: define i16 @test_vqrdmulhh_lane_s16(i16 %a, <4 x i16> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %b, i32 3 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGET_LANE]], i64 0 // CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) @@ -354,9 +294,7 @@ } // CHECK-LABEL: define i32 @test_vqrdmulhs_lane_s32(i32 %a, <2 x i32> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %b, i32 1 // CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGET_LANE]]) // CHECK: ret i32 [[VQRDMULHS_S32_I]] int32_t test_vqrdmulhs_lane_s32(int32_t a, int32x2_t b) { @@ -365,9 +303,7 @@ // CHECK-LABEL: define i16 @test_vqrdmulhh_laneq_s16(i16 %a, <8 x i16> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %b, i32 7 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %a, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[VGETQ_LANE]], i64 0 // CHECK: [[VQRDMULHH_S16_I:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqrdmulh.v4i16(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) @@ -379,9 +315,7 @@ // CHECK-LABEL: define i32 @test_vqrdmulhs_laneq_s32(i32 %a, <4 x i32> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %b, i32 3 // CHECK: [[VQRDMULHS_S32_I:%.*]] = call i32 @llvm.aarch64.neon.sqrdmulh.i32(i32 %a, i32 [[VGETQ_LANE]]) // CHECK: ret i32 [[VQRDMULHS_S32_I]] int32_t test_vqrdmulhs_laneq_s32(int32_t a, int32x4_t b) { @@ -389,9 +323,7 @@ } // CHECK-LABEL: define i32 @test_vqdmlalh_lane_s16(i32 %a, i16 %b, <4 x i16> %c) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +// CHECK: [[LANE:%.*]] = extractelement <4 x i16> %c, i32 3 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0 // CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) @@ -403,9 +335,7 @@ } // CHECK-LABEL: define i64 @test_vqdmlals_lane_s32(i64 %a, i32 %b, <2 x i32> %c) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +// CHECK: [[LANE:%.*]] = extractelement <2 x i32> %c, i32 1 // CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]]) // CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL]]) // CHECK: ret i64 [[VQDMLXL1]] @@ -414,9 +344,7 @@ } // CHECK-LABEL: define i32 @test_vqdmlalh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 +// CHECK: [[LANE:%.*]] = extractelement <8 x i16> %c, i32 7 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0 // CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) @@ -428,9 +356,7 @@ } // CHECK-LABEL: define i64 @test_vqdmlals_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +// CHECK: [[LANE:%.*]] = extractelement <4 x i32> %c, i32 3 // CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]]) // CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqadd.i64(i64 %a, i64 [[VQDMLXL]]) // CHECK: ret i64 [[VQDMLXL1]] @@ -439,9 +365,7 @@ } // CHECK-LABEL: define i32 @test_vqdmlslh_lane_s16(i32 %a, i16 %b, <4 x i16> %c) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %c to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +// CHECK: [[LANE:%.*]] = extractelement <4 x i16> %c, i32 3 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0 // CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) @@ -453,9 +377,7 @@ } // CHECK-LABEL: define i64 @test_vqdmlsls_lane_s32(i64 %a, i32 %b, <2 x i32> %c) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %c to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +// CHECK: [[LANE:%.*]] = extractelement <2 x i32> %c, i32 1 // CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]]) // CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL]]) // CHECK: ret i64 [[VQDMLXL1]] @@ -464,9 +386,7 @@ } // CHECK-LABEL: define i32 @test_vqdmlslh_laneq_s16(i32 %a, i16 %b, <8 x i16> %c) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %c to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 +// CHECK: [[LANE:%.*]] = extractelement <8 x i16> %c, i32 7 // CHECK: [[TMP2:%.*]] = insertelement <4 x i16> undef, i16 %b, i64 0 // CHECK: [[TMP3:%.*]] = insertelement <4 x i16> undef, i16 [[LANE]], i64 0 // CHECK: [[VQDMLXL:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> [[TMP2]], <4 x i16> [[TMP3]]) @@ -478,9 +398,7 @@ } // CHECK-LABEL: define i64 @test_vqdmlsls_laneq_s32(i64 %a, i32 %b, <4 x i32> %c) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %c to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +// CHECK: [[LANE:%.*]] = extractelement <4 x i32> %c, i32 3 // CHECK: [[VQDMLXL:%.*]] = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %b, i32 [[LANE]]) // CHECK: [[VQDMLXL1:%.*]] = call i64 @llvm.aarch64.neon.sqsub.i64(i64 %a, i64 [[VQDMLXL]]) // CHECK: ret i64 [[VQDMLXL1]] @@ -491,16 +409,10 @@ // CHECK-LABEL: define <1 x double> @test_vmulx_lane_f64_0() #0 { // CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double> // CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double> -// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP3]], i32 0 -// CHECK: [[TMP4:%.*]] = bitcast <1 x double> [[TMP1]] to <8 x i8> -// CHECK: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP4]] to <1 x double> -// CHECK: [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP5]], i32 0 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0 +// CHECK: [[VGET_LANE7:%.*]] = extractelement <1 x double> [[TMP1]], i32 0 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGET_LANE7]]) -// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8> -// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP7]], double [[VMULXD_F64_I]], i32 0 +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0 // CHECK: ret <1 x double> [[VSET_LANE]] float64x1_t test_vmulx_lane_f64_0() { float64x1_t arg1; @@ -517,16 +429,10 @@ // CHECK: [[TMP0:%.*]] = bitcast i64 4599917171378402754 to <1 x double> // CHECK: [[TMP1:%.*]] = bitcast i64 4606655882138939123 to <1 x double> // CHECK: [[SHUFFLE_I:%.*]] = shufflevector <1 x double> [[TMP0]], <1 x double> [[TMP1]], <2 x i32> -// CHECK: [[TMP2:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x double> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP3]], i32 0 -// CHECK: [[TMP4:%.*]] = bitcast <2 x double> [[SHUFFLE_I]] to <16 x i8> -// CHECK: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP4]] to <2 x double> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP5]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x double> [[TMP0]], i32 0 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[SHUFFLE_I]], i32 1 // CHECK: [[VMULXD_F64_I:%.*]] = call double @llvm.aarch64.neon.fmulx.f64(double [[VGET_LANE]], double [[VGETQ_LANE]]) -// CHECK: [[TMP6:%.*]] = bitcast <1 x double> [[TMP0]] to <8 x i8> -// CHECK: [[TMP7:%.*]] = bitcast <8 x i8> [[TMP6]] to <1 x double> -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP7]], double [[VMULXD_F64_I]], i32 0 +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x double> [[TMP0]], double [[VMULXD_F64_I]], i32 0 // CHECK: ret <1 x double> [[VSET_LANE]] float64x1_t test_vmulx_laneq_f64_2() { float64x1_t arg1; diff --git a/clang/test/CodeGen/aarch64-neon-vget.c b/clang/test/CodeGen/aarch64-neon-vget.c --- a/clang/test/CodeGen/aarch64-neon-vget.c +++ b/clang/test/CodeGen/aarch64-neon-vget.c @@ -12,18 +12,14 @@ } // CHECK-LABEL: define i16 @test_vget_lane_u16(<4 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3 // CHECK: ret i16 [[VGET_LANE]] uint16_t test_vget_lane_u16(uint16x4_t a) { return vget_lane_u16(a, 3); } // CHECK-LABEL: define i32 @test_vget_lane_u32(<2 x i32> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1 // CHECK: ret i32 [[VGET_LANE]] uint32_t test_vget_lane_u32(uint32x2_t a) { return vget_lane_u32(a, 1); @@ -37,18 +33,14 @@ } // CHECK-LABEL: define i16 @test_vget_lane_s16(<4 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3 // CHECK: ret i16 [[VGET_LANE]] int16_t test_vget_lane_s16(int16x4_t a) { return vget_lane_s16(a, 3); } // CHECK-LABEL: define i32 @test_vget_lane_s32(<2 x i32> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1 // CHECK: ret i32 [[VGET_LANE]] int32_t test_vget_lane_s32(int32x2_t a) { return vget_lane_s32(a, 1); @@ -62,18 +54,14 @@ } // CHECK-LABEL: define i16 @test_vget_lane_p16(<4 x i16> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3 // CHECK: ret i16 [[VGET_LANE]] poly16_t test_vget_lane_p16(poly16x4_t a) { return vget_lane_p16(a, 3); } // CHECK-LABEL: define float @test_vget_lane_f32(<2 x float> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %a, i32 1 // CHECK: ret float [[VGET_LANE]] float32_t test_vget_lane_f32(float32x2_t a) { return vget_lane_f32(a, 1); @@ -85,9 +73,7 @@ // CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8 // CHECK: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>* // CHECK: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8 -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 1 // CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2 // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half* // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2 @@ -105,18 +91,14 @@ } // CHECK-LABEL: define i16 @test_vgetq_lane_u16(<8 x i16> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7 // CHECK: ret i16 [[VGETQ_LANE]] uint16_t test_vgetq_lane_u16(uint16x8_t a) { return vgetq_lane_u16(a, 7); } // CHECK-LABEL: define i32 @test_vgetq_lane_u32(<4 x i32> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3 // CHECK: ret i32 [[VGETQ_LANE]] uint32_t test_vgetq_lane_u32(uint32x4_t a) { return vgetq_lane_u32(a, 3); @@ -130,18 +112,14 @@ } // CHECK-LABEL: define i16 @test_vgetq_lane_s16(<8 x i16> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7 // CHECK: ret i16 [[VGETQ_LANE]] int16_t test_vgetq_lane_s16(int16x8_t a) { return vgetq_lane_s16(a, 7); } // CHECK-LABEL: define i32 @test_vgetq_lane_s32(<4 x i32> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a, i32 3 // CHECK: ret i32 [[VGETQ_LANE]] int32_t test_vgetq_lane_s32(int32x4_t a) { return vgetq_lane_s32(a, 3); @@ -155,18 +133,14 @@ } // CHECK-LABEL: define i16 @test_vgetq_lane_p16(<8 x i16> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a, i32 7 // CHECK: ret i16 [[VGETQ_LANE]] poly16_t test_vgetq_lane_p16(poly16x8_t a) { return vgetq_lane_p16(a, 7); } // CHECK-LABEL: define float @test_vgetq_lane_f32(<4 x float> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %a, i32 3 // CHECK: ret float [[VGETQ_LANE]] float32_t test_vgetq_lane_f32(float32x4_t a) { return vgetq_lane_f32(a, 3); @@ -178,9 +152,7 @@ // CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16 // CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>* // CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16 -// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3 // CHECK: store i16 [[VGETQ_LANE]], i16* [[__REINT1_244]], align 2 // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half* // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2 @@ -191,36 +163,28 @@ } // CHECK-LABEL: define i64 @test_vget_lane_s64(<1 x i64> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0 // CHECK: ret i64 [[VGET_LANE]] int64_t test_vget_lane_s64(int64x1_t a) { return vget_lane_s64(a, 0); } // CHECK-LABEL: define i64 @test_vget_lane_u64(<1 x i64> %a) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0 // CHECK: ret i64 [[VGET_LANE]] uint64_t test_vget_lane_u64(uint64x1_t a) { return vget_lane_u64(a, 0); } // CHECK-LABEL: define i64 @test_vgetq_lane_s64(<2 x i64> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1 // CHECK: ret i64 [[VGETQ_LANE]] int64_t test_vgetq_lane_s64(int64x2_t a) { return vgetq_lane_s64(a, 1); } // CHECK-LABEL: define i64 @test_vgetq_lane_u64(<2 x i64> %a) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a, i32 1 // CHECK: ret i64 [[VGETQ_LANE]] uint64_t test_vgetq_lane_u64(uint64x2_t a) { return vgetq_lane_u64(a, 1); @@ -235,18 +199,14 @@ } // CHECK-LABEL: define <4 x i16> @test_vset_lane_u16(i16 %a, <4 x i16> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3 // CHECK: ret <4 x i16> [[VSET_LANE]] uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) { return vset_lane_u16(a, b, 3); } // CHECK-LABEL: define <2 x i32> @test_vset_lane_u32(i32 %a, <2 x i32> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1 // CHECK: ret <2 x i32> [[VSET_LANE]] uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) { return vset_lane_u32(a, b, 1); @@ -260,18 +220,14 @@ } // CHECK-LABEL: define <4 x i16> @test_vset_lane_s16(i16 %a, <4 x i16> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3 // CHECK: ret <4 x i16> [[VSET_LANE]] int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) { return vset_lane_s16(a, b, 3); } // CHECK-LABEL: define <2 x i32> @test_vset_lane_s32(i32 %a, <2 x i32> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1 // CHECK: ret <2 x i32> [[VSET_LANE]] int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) { return vset_lane_s32(a, b, 1); @@ -285,18 +241,14 @@ } // CHECK-LABEL: define <4 x i16> @test_vset_lane_p16(i16 %a, <4 x i16> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3 // CHECK: ret <4 x i16> [[VSET_LANE]] poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) { return vset_lane_p16(a, b, 3); } // CHECK-LABEL: define <2 x float> @test_vset_lane_f32(float %a, <2 x float> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float %a, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> %b, float %a, i32 1 // CHECK: ret <2 x float> [[VSET_LANE]] float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) { return vset_lane_f32(a, b, 1); @@ -313,9 +265,7 @@ // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 // CHECK: [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>* // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8 -// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> -// CHECK: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP2]], i32 3 // CHECK: store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>* // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8 @@ -332,18 +282,14 @@ } // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_u16(i16 %a, <8 x i16> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 +// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7 // CHECK: ret <8 x i16> [[VSET_LANE]] uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) { return vsetq_lane_u16(a, b, 7); } // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_u32(i32 %a, <4 x i32> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3 // CHECK: ret <4 x i32> [[VSET_LANE]] uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) { return vsetq_lane_u32(a, b, 3); @@ -357,18 +303,14 @@ } // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_s16(i16 %a, <8 x i16> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 +// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7 // CHECK: ret <8 x i16> [[VSET_LANE]] int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) { return vsetq_lane_s16(a, b, 7); } // CHECK-LABEL: define <4 x i32> @test_vsetq_lane_s32(i32 %a, <4 x i32> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3 // CHECK: ret <4 x i32> [[VSET_LANE]] int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) { return vsetq_lane_s32(a, b, 3); @@ -382,18 +324,14 @@ } // CHECK-LABEL: define <8 x i16> @test_vsetq_lane_p16(i16 %a, <8 x i16> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 +// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7 // CHECK: ret <8 x i16> [[VSET_LANE]] poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) { return vsetq_lane_p16(a, b, 7); } // CHECK-LABEL: define <4 x float> @test_vsetq_lane_f32(float %a, <4 x float> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> %b, float %a, i32 3 // CHECK: ret <4 x float> [[VSET_LANE]] float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) { return vsetq_lane_f32(a, b, 3); @@ -410,9 +348,7 @@ // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 // CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>* // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16 -// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> -// CHECK: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 7 +// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP2]], i32 7 // CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>* // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16 @@ -422,36 +358,28 @@ } // CHECK-LABEL: define <1 x i64> @test_vset_lane_s64(i64 %a, <1 x i64> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0 // CHECK: ret <1 x i64> [[VSET_LANE]] int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) { return vset_lane_s64(a, b, 0); } // CHECK-LABEL: define <1 x i64> @test_vset_lane_u64(i64 %a, <1 x i64> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0 // CHECK: ret <1 x i64> [[VSET_LANE]] uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) { return vset_lane_u64(a, b, 0); } // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_s64(i64 %a, <2 x i64> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1 // CHECK: ret <2 x i64> [[VSET_LANE]] int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) { return vsetq_lane_s64(a, b, 1); } // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_u64(i64 %a, <2 x i64> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1 // CHECK: ret <2 x i64> [[VSET_LANE]] uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) { return vsetq_lane_u64(a, b, 1); diff --git a/clang/test/CodeGen/aarch64-poly64.c b/clang/test/CodeGen/aarch64-poly64.c --- a/clang/test/CodeGen/aarch64-poly64.c +++ b/clang/test/CodeGen/aarch64-poly64.c @@ -61,48 +61,36 @@ } // CHECK-LABEL: define i64 @test_vget_lane_p64(<1 x i64> %v) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %v, i32 0 // CHECK: ret i64 [[VGET_LANE]] poly64_t test_vget_lane_p64(poly64x1_t v) { return vget_lane_p64(v, 0); } // CHECK-LABEL: define i64 @test_vgetq_lane_p64(<2 x i64> %v) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %v, i32 1 // CHECK: ret i64 [[VGETQ_LANE]] poly64_t test_vgetq_lane_p64(poly64x2_t v) { return vgetq_lane_p64(v, 1); } // CHECK-LABEL: define <1 x i64> @test_vset_lane_p64(i64 %a, <1 x i64> %v) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %v to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %v, i64 %a, i32 0 // CHECK: ret <1 x i64> [[VSET_LANE]] poly64x1_t test_vset_lane_p64(poly64_t a, poly64x1_t v) { return vset_lane_p64(a, v, 0); } // CHECK-LABEL: define <2 x i64> @test_vsetq_lane_p64(i64 %a, <2 x i64> %v) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %v to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %v, i64 %a, i32 1 // CHECK: ret <2 x i64> [[VSET_LANE]] poly64x2_t test_vsetq_lane_p64(poly64_t a, poly64x2_t v) { return vsetq_lane_p64(a, v, 1); } // CHECK-LABEL: define <1 x i64> @test_vcopy_lane_p64(<1 x i64> %a, <1 x i64> %b) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 -// CHECK: [[TMP2:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <1 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP3]], i64 [[VGET_LANE]], i32 0 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0 +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %a, i64 [[VGET_LANE]], i32 0 // CHECK: ret <1 x i64> [[VSET_LANE]] poly64x1_t test_vcopy_lane_p64(poly64x1_t a, poly64x1_t b) { return vcopy_lane_p64(a, 0, b, 0); @@ -110,24 +98,16 @@ } // CHECK-LABEL: define <2 x i64> @test_vcopyq_lane_p64(<2 x i64> %a, <1 x i64> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 -// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGET_LANE]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %b, i32 0 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64 [[VGET_LANE]], i32 1 // CHECK: ret <2 x i64> [[VSET_LANE]] poly64x2_t test_vcopyq_lane_p64(poly64x2_t a, poly64x1_t b) { return vcopyq_lane_p64(a, 1, b, 0); } // CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_p64(<2 x i64> %a, <2 x i64> %b) #1 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 -// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 1 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %b, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a, i64 [[VGETQ_LANE]], i32 1 // CHECK: ret <2 x i64> [[VSET_LANE]] poly64x2_t test_vcopyq_laneq_p64(poly64x2_t a, poly64x2_t b) { return vcopyq_laneq_p64(a, 1, b, 1); diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c --- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -960,9 +960,7 @@ } // CHECK-LABEL: test_vfmah_lane_f16 -// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %c to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3 +// CHECK: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3 // CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a) // CHECK: ret half [[FMA]] float16_t test_vfmah_lane_f16(float16_t a, float16_t b, float16x4_t c) { @@ -970,9 +968,7 @@ } // CHECK-LABEL: test_vfmah_laneq_f16 -// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %c to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> -// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7 +// CHECK: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7 // CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half %b, half [[EXTR]], half %a) // CHECK: ret half [[FMA]] float16_t test_vfmah_laneq_f16(float16_t a, float16_t b, float16x8_t c) { @@ -1071,9 +1067,7 @@ // CHECK: [[TMP0:%.*]] = fpext half %b to float // CHECK: [[TMP1:%.*]] = fsub float -0.000000e+00, [[TMP0]] // CHECK: [[SUB:%.*]] = fptrunc float [[TMP1]] to half -// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x half> -// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP3]], i32 3 +// CHECK: [[EXTR:%.*]] = extractelement <4 x half> %c, i32 3 // CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a) // CHECK: ret half [[FMA]] float16_t test_vfmsh_lane_f16(float16_t a, float16_t b, float16x4_t c) { @@ -1084,9 +1078,7 @@ // CHECK: [[TMP0:%.*]] = fpext half %b to float // CHECK: [[TMP1:%.*]] = fsub float -0.000000e+00, [[TMP0]] // CHECK: [[SUB:%.*]] = fptrunc float [[TMP1]] to half -// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x half> -// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP3]], i32 7 +// CHECK: [[EXTR:%.*]] = extractelement <8 x half> %c, i32 7 // CHECK: [[FMA:%.*]] = call half @llvm.fma.f16(half [[SUB]], half [[EXTR]], half %a) // CHECK: ret half [[FMA]] float16_t test_vfmsh_laneq_f16(float16_t a, float16_t b, float16x8_t c) { @@ -1231,9 +1223,7 @@ } // CHECK-LABEL: test_vmulxh_lane_f16 -// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK: [[EXTR:%.*]] = extractelement <4 x half> [[TMP1]], i32 3 +// CHECK: [[EXTR:%.*]] = extractelement <4 x half> %b, i32 3 // CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half [[EXTR]] // CHECK: ret half [[MULX]] float16_t test_vmulxh_lane_f16(float16_t a, float16x4_t b) { @@ -1241,9 +1231,7 @@ } // CHECK-LABEL: test_vmulxh_laneq_f16 -// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> -// CHECK: [[EXTR:%.*]] = extractelement <8 x half> [[TMP1]], i32 7 +// CHECK: [[EXTR:%.*]] = extractelement <8 x half> %b, i32 7 // CHECK: [[MULX:%.*]] = call half @llvm.aarch64.neon.fmulx.f16(half %a, half [[EXTR]]) // CHECK: ret half [[MULX]] float16_t test_vmulxh_laneq_f16(float16_t a, float16x8_t b) { diff --git a/clang/test/CodeGen/arm64-lanes.c b/clang/test/CodeGen/arm64-lanes.c --- a/clang/test/CodeGen/arm64-lanes.c +++ b/clang/test/CodeGen/arm64-lanes.c @@ -9,7 +9,7 @@ // CHECK: extractelement <8 x i8> %src, i32 2 // CHECK-BE-LABEL: @test_vdupb_lane_s8 - // CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> {{.*}}, <8 x i32> + // CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> %src, <8 x i8> %src, <8 x i32> // CHECK-BE: extractelement <8 x i8> [[REV]], i32 2 } @@ -19,109 +19,83 @@ // CHECK: extractelement <8 x i8> %src, i32 2 // CHECK-BE-LABEL: @test_vdupb_lane_u8 - // CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> {{.*}}, <8 x i32> + // CHECK-BE: [[REV:%.*]] = shufflevector <8 x i8> %src, <8 x i8> %src, <8 x i32> // CHECK-BE: extractelement <8 x i8> [[REV]], i32 2 } int16_t test_vduph_lane_s16(int16x4_t src) { return vduph_lane_s16(src, 2); // CHECK-LABEL: @test_vduph_lane_s16 - // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %src to [[TYPE:.*]] - // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16> - // CHECK: extractelement <4 x i16> [[TMP2]], i32 2 + // CHECK: extractelement <4 x i16> %src, i32 2 // CHECK-BE-LABEL: @test_vduph_lane_s16 - // CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> {{.*}}, <4 x i32> - // CHECK-BE: [[TMP1:%.*]] = bitcast <4 x i16> [[REV]] to [[TYPE:.*]] - // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16> - // CHECK-BE: extractelement <4 x i16> [[TMP2]], i32 2 + // CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> %src, <4 x i16> %src, <4 x i32> + // CHECK-BE: extractelement <4 x i16> [[REV]], i32 2 } uint16_t test_vduph_lane_u16(uint16x4_t src) { return vduph_lane_u16(src, 2); // CHECK-LABEL: @test_vduph_lane_u16 - // CHECK: [[TMP1:%.*]] = bitcast <4 x i16> %src to [[TYPE:.*]] - // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16> - // CHECK: extractelement <4 x i16> [[TMP2]], i32 2 + // CHECK: extractelement <4 x i16> %src, i32 2 // CHECK-BE-LABEL: @test_vduph_lane_u16 - // CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> {{.*}}, <4 x i32> - // CHECK-BE: [[TMP1:%.*]] = bitcast <4 x i16> [[REV]] to [[TYPE:.*]] - // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <4 x i16> - // CHECK-BE: extractelement <4 x i16> [[TMP2]], i32 2 + // CHECK-BE: [[REV:%.*]] = shufflevector <4 x i16> %src, <4 x i16> %src, <4 x i32> + // CHECK-BE: extractelement <4 x i16> [[REV]], i32 2 } int32_t test_vdups_lane_s32(int32x2_t src) { return vdups_lane_s32(src, 0); // CHECK-LABEL: @test_vdups_lane_s32 - // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %src to [[TYPE:.*]] - // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32> - // CHECK: extractelement <2 x i32> [[TMP2]], i32 0 + // CHECK: extractelement <2 x i32> %src, i32 0 // CHECK-BE-LABEL: @test_vdups_lane_s32 - // CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> {{.*}}, <2 x i32> - // CHECK-BE: [[TMP1:%.*]] = bitcast <2 x i32> [[REV]] to [[TYPE:.*]] - // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32> - // CHECK-BE: extractelement <2 x i32> [[TMP2]], i32 0 + // CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> %src, <2 x i32> %src, <2 x i32> + // CHECK-BE: extractelement <2 x i32> [[REV]], i32 0 } uint32_t test_vdups_lane_u32(uint32x2_t src) { return vdups_lane_u32(src, 0); // CHECK-LABEL: @test_vdups_lane_u32 - // CHECK: [[TMP1:%.*]] = bitcast <2 x i32> %src to [[TYPE:.*]] - // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32> - // CHECK: extractelement <2 x i32> [[TMP2]], i32 0 + // CHECK: extractelement <2 x i32> %src, i32 0 // CHECK-BE-LABEL: @test_vdups_lane_u32 - // CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> {{.*}}, <2 x i32> - // CHECK-BE: [[TMP1:%.*]] = bitcast <2 x i32> [[REV]] to [[TYPE:.*]] - // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x i32> - // CHECK-BE: extractelement <2 x i32> [[TMP2]], i32 0 + // CHECK-BE: [[REV:%.*]] = shufflevector <2 x i32> %src, <2 x i32> %src, <2 x i32> + // CHECK-BE: extractelement <2 x i32> [[REV]], i32 0 } float32_t test_vdups_lane_f32(float32x2_t src) { return vdups_lane_f32(src, 0); // CHECK-LABEL: @test_vdups_lane_f32 - // CHECK: [[TMP1:%.*]] = bitcast <2 x float> %src to [[TYPE:.*]] - // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x float> - // CHECK: extractelement <2 x float> [[TMP2]], i32 0 + // CHECK: extractelement <2 x float> %src, i32 0 // CHECK-BE-LABEL: @test_vdups_lane_f32 - // CHECK-BE: [[REV:%.*]] = shufflevector <2 x float> {{.*}}, <2 x i32> - // CHECK-BE: [[TMP1:%.*]] = bitcast <2 x float> [[REV]] to [[TYPE:.*]] - // CHECK-BE: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <2 x float> - // CHECK-BE: extractelement <2 x float> [[TMP2]], i32 0 + // CHECK-BE: [[REV:%.*]] = shufflevector <2 x float> %src, <2 x float> %src, <2 x i32> + // CHECK-BE: extractelement <2 x float> [[REV]], i32 0 } int64_t test_vdupd_lane_s64(int64x1_t src) { return vdupd_lane_s64(src, 0); // CHECK-LABEL: @test_vdupd_lane_s64 - // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %src to [[TYPE:.*]] - // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <1 x i64> - // CHECK: extractelement <1 x i64> [[TMP2]], i32 0 + // CHECK: extractelement <1 x i64> %src, i32 0 // CHECK-BE-LABEL: @test_vdupd_lane_s64 - // CHECK-BE: extractelement <1 x i64> {{.*}}, i32 0 + // CHECK-BE: extractelement <1 x i64> %src, i32 0 } uint64_t test_vdupd_lane_u64(uint64x1_t src) { return vdupd_lane_u64(src, 0); // CHECK-LABEL: @test_vdupd_lane_u64 - // CHECK: [[TMP1:%.*]] = bitcast <1 x i64> %src to [[TYPE:.*]] - // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <1 x i64> - // CHECK: extractelement <1 x i64> [[TMP2]], i32 0 + // CHECK: extractelement <1 x i64> %src, i32 0 // CHECK-BE-LABEL: @test_vdupd_lane_u64 - // CHECK-BE: extractelement <1 x i64> {{.*}}, i32 0 + // CHECK-BE: extractelement <1 x i64> %src, i32 0 } float64_t test_vdupd_lane_f64(float64x1_t src) { return vdupd_lane_f64(src, 0); // CHECK-LABEL: @test_vdupd_lane_f64 - // CHECK: [[TMP1:%.*]] = bitcast <1 x double> %src to [[TYPE:.*]] - // CHECK: [[TMP2:%.*]] = bitcast [[TYPE]] [[TMP1]] to <1 x double> - // CHECK: extractelement <1 x double> [[TMP2]], i32 0 + // CHECK: extractelement <1 x double> %src, i32 0 // CHECK-BE-LABEL: @test_vdupd_lane_f64 - // CHECK-BE: extractelement <1 x double> {{.*}}, i32 0 + // CHECK-BE: extractelement <1 x double> %src, i32 0 } diff --git a/clang/test/CodeGen/arm64_vcopy.c b/clang/test/CodeGen/arm64_vcopy.c --- a/clang/test/CodeGen/arm64_vcopy.c +++ b/clang/test/CodeGen/arm64_vcopy.c @@ -22,12 +22,8 @@ } // CHECK-LABEL: define <8 x i16> @test_vcopyq_laneq_s16(<8 x i16> %a1, <8 x i16> %a2) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a2 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 -// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %a1 to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[VGETQ_LANE]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a2, i32 7 +// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %a1, i16 [[VGETQ_LANE]], i32 3 // CHECK: ret <8 x i16> [[VSET_LANE]] int16x8_t test_vcopyq_laneq_s16(int16x8_t a1, int16x8_t a2) { return vcopyq_laneq_s16(a1, (int64_t) 3, a2, (int64_t) 7); @@ -35,12 +31,8 @@ } // CHECK-LABEL: define <8 x i16> @test_vcopyq_laneq_u16(<8 x i16> %a1, <8 x i16> %a2) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a2 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 -// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> %a1 to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP3]], i16 [[VGETQ_LANE]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <8 x i16> %a2, i32 7 +// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %a1, i16 [[VGETQ_LANE]], i32 3 // CHECK: ret <8 x i16> [[VSET_LANE]] uint16x8_t test_vcopyq_laneq_u16(uint16x8_t a1, uint16x8_t a2) { return vcopyq_laneq_u16(a1, (int64_t) 3, a2, (int64_t) 7); @@ -48,72 +40,48 @@ } // CHECK-LABEL: define <4 x i32> @test_vcopyq_laneq_s32(<4 x i32> %a1, <4 x i32> %a2) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a2 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 -// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %a1 to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[VGETQ_LANE]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a2, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %a1, i32 [[VGETQ_LANE]], i32 3 // CHECK: ret <4 x i32> [[VSET_LANE]] int32x4_t test_vcopyq_laneq_s32(int32x4_t a1, int32x4_t a2) { return vcopyq_laneq_s32(a1, (int64_t) 3, a2, (int64_t) 3); } // CHECK-LABEL: define <4 x i32> @test_vcopyq_laneq_u32(<4 x i32> %a1, <4 x i32> %a2) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a2 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 -// CHECK: [[TMP2:%.*]] = bitcast <4 x i32> %a1 to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[VGETQ_LANE]], i32 3 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x i32> %a2, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %a1, i32 [[VGETQ_LANE]], i32 3 // CHECK: ret <4 x i32> [[VSET_LANE]] uint32x4_t test_vcopyq_laneq_u32(uint32x4_t a1, uint32x4_t a2) { return vcopyq_laneq_u32(a1, (int64_t) 3, a2, (int64_t) 3); } // CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_s64(<2 x i64> %a1, <2 x i64> %a2) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a2 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 -// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a1 to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 0 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a2, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a1, i64 [[VGETQ_LANE]], i32 0 // CHECK: ret <2 x i64> [[VSET_LANE]] int64x2_t test_vcopyq_laneq_s64(int64x2_t a1, int64x2_t a2) { return vcopyq_laneq_s64(a1, (int64_t) 0, a2, (int64_t) 1); } // CHECK-LABEL: define <2 x i64> @test_vcopyq_laneq_u64(<2 x i64> %a1, <2 x i64> %a2) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a2 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 -// CHECK: [[TMP2:%.*]] = bitcast <2 x i64> %a1 to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[VGETQ_LANE]], i32 0 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x i64> %a2, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %a1, i64 [[VGETQ_LANE]], i32 0 // CHECK: ret <2 x i64> [[VSET_LANE]] uint64x2_t test_vcopyq_laneq_u64(uint64x2_t a1, uint64x2_t a2) { return vcopyq_laneq_u64(a1, (int64_t) 0, a2, (int64_t) 1); } // CHECK-LABEL: define <4 x float> @test_vcopyq_laneq_f32(<4 x float> %a1, <4 x float> %a2) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a2 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 -// CHECK: [[TMP2:%.*]] = bitcast <4 x float> %a1 to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP3]], float [[VGETQ_LANE]], i32 0 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <4 x float> %a2, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> %a1, float [[VGETQ_LANE]], i32 0 // CHECK: ret <4 x float> [[VSET_LANE]] float32x4_t test_vcopyq_laneq_f32(float32x4_t a1, float32x4_t a2) { return vcopyq_laneq_f32(a1, 0, a2, 3); } // CHECK-LABEL: define <2 x double> @test_vcopyq_laneq_f64(<2 x double> %a1, <2 x double> %a2) #0 { -// CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a2 to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x double> -// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 -// CHECK: [[TMP2:%.*]] = bitcast <2 x double> %a1 to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x double> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x double> [[TMP3]], double [[VGETQ_LANE]], i32 0 +// CHECK: [[VGETQ_LANE:%.*]] = extractelement <2 x double> %a2, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x double> %a1, double [[VGETQ_LANE]], i32 0 // CHECK: ret <2 x double> [[VSET_LANE]] float64x2_t test_vcopyq_laneq_f64(float64x2_t a1, float64x2_t a2) { return vcopyq_laneq_f64(a1, 0, a2, 1); diff --git a/clang/test/CodeGen/arm_neon_intrinsics.c b/clang/test/CodeGen/arm_neon_intrinsics.c --- a/clang/test/CodeGen/arm_neon_intrinsics.c +++ b/clang/test/CodeGen/arm_neon_intrinsics.c @@ -3319,18 +3319,14 @@ } // CHECK-LABEL: @test_vget_lane_u16( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3 // CHECK: ret i16 [[VGET_LANE]] uint16_t test_vget_lane_u16(uint16x4_t a) { return vget_lane_u16(a, 3); } // CHECK-LABEL: @test_vget_lane_u32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1 // CHECK: ret i32 [[VGET_LANE]] uint32_t test_vget_lane_u32(uint32x2_t a) { return vget_lane_u32(a, 1); @@ -3344,18 +3340,14 @@ } // CHECK-LABEL: @test_vget_lane_s16( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3 // CHECK: ret i16 [[VGET_LANE]] int16_t test_vget_lane_s16(int16x4_t a) { return vget_lane_s16(a, 3); } // CHECK-LABEL: @test_vget_lane_s32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i32> %a, i32 1 // CHECK: ret i32 [[VGET_LANE]] int32_t test_vget_lane_s32(int32x2_t a) { return vget_lane_s32(a, 1); @@ -3369,18 +3361,14 @@ } // CHECK-LABEL: @test_vget_lane_p16( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> %a, i32 3 // CHECK: ret i16 [[VGET_LANE]] poly16_t test_vget_lane_p16(poly16x4_t a) { return vget_lane_p16(a, 3); } // CHECK-LABEL: @test_vget_lane_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x float> %a, i32 1 // CHECK: ret float [[VGET_LANE]] float32_t test_vget_lane_f32(float32x2_t a) { return vget_lane_f32(a, 1); @@ -3392,9 +3380,7 @@ // CHECK: store <4 x half> %a, <4 x half>* [[__REINT_242]], align 8 // CHECK: [[TMP0:%.*]] = bitcast <4 x half>* [[__REINT_242]] to <4 x i16>* // CHECK: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 8 -// CHECK: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to <8 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP3]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i16> [[TMP1]], i32 1 // CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_242]], align 2 // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_242]] to half* // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2 @@ -3412,18 +3398,14 @@ } // CHECK-LABEL: @test_vgetq_lane_u16( -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 +// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7 // CHECK: ret i16 [[VGET_LANE]] uint16_t test_vgetq_lane_u16(uint16x8_t a) { return vgetq_lane_u16(a, 7); } // CHECK-LABEL: @test_vgetq_lane_u32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3 // CHECK: ret i32 [[VGET_LANE]] uint32_t test_vgetq_lane_u32(uint32x4_t a) { return vgetq_lane_u32(a, 3); @@ -3437,18 +3419,14 @@ } // CHECK-LABEL: @test_vgetq_lane_s16( -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 +// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7 // CHECK: ret i16 [[VGET_LANE]] int16_t test_vgetq_lane_s16(int16x8_t a) { return vgetq_lane_s16(a, 7); } // CHECK-LABEL: @test_vgetq_lane_s32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x i32> %a, i32 3 // CHECK: ret i32 [[VGET_LANE]] int32_t test_vgetq_lane_s32(int32x4_t a) { return vgetq_lane_s32(a, 3); @@ -3462,18 +3440,14 @@ } // CHECK-LABEL: @test_vgetq_lane_p16( -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 7 +// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> %a, i32 7 // CHECK: ret i16 [[VGET_LANE]] poly16_t test_vgetq_lane_p16(poly16x8_t a) { return vgetq_lane_p16(a, 7); } // CHECK-LABEL: @test_vgetq_lane_f32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x float> [[TMP1]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <4 x float> %a, i32 3 // CHECK: ret float [[VGET_LANE]] float32_t test_vgetq_lane_f32(float32x4_t a) { return vgetq_lane_f32(a, 3); @@ -3485,9 +3459,7 @@ // CHECK: store <8 x half> %a, <8 x half>* [[__REINT_244]], align 16 // CHECK: [[TMP0:%.*]] = bitcast <8 x half>* [[__REINT_244]] to <8 x i16>* // CHECK: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 16 -// CHECK: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <16 x i8> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP3]], i32 3 +// CHECK: [[VGET_LANE:%.*]] = extractelement <8 x i16> [[TMP1]], i32 3 // CHECK: store i16 [[VGET_LANE]], i16* [[__REINT1_244]], align 2 // CHECK: [[TMP4:%.*]] = bitcast i16* [[__REINT1_244]] to half* // CHECK: [[TMP5:%.*]] = load half, half* [[TMP4]], align 2 @@ -3498,36 +3470,28 @@ } // CHECK-LABEL: @test_vget_lane_s64( -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0 // CHECK: ret i64 [[VGET_LANE]] int64_t test_vget_lane_s64(int64x1_t a) { return vget_lane_s64(a, 0); } // CHECK-LABEL: @test_vget_lane_u64( -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> [[TMP1]], i32 0 +// CHECK: [[VGET_LANE:%.*]] = extractelement <1 x i64> %a, i32 0 // CHECK: ret i64 [[VGET_LANE]] uint64_t test_vget_lane_u64(uint64x1_t a) { return vget_lane_u64(a, 0); } // CHECK-LABEL: @test_vgetq_lane_s64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1 // CHECK: ret i64 [[VGET_LANE]] int64_t test_vgetq_lane_s64(int64x2_t a) { return vgetq_lane_s64(a, 1); } // CHECK-LABEL: @test_vgetq_lane_u64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> [[TMP1]], i32 1 +// CHECK: [[VGET_LANE:%.*]] = extractelement <2 x i64> %a, i32 1 // CHECK: ret i64 [[VGET_LANE]] uint64_t test_vgetq_lane_u64(uint64x2_t a) { return vgetq_lane_u64(a, 1); @@ -14061,18 +14025,14 @@ } // CHECK-LABEL: @test_vset_lane_u16( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3 // CHECK: ret <4 x i16> [[VSET_LANE]] uint16x4_t test_vset_lane_u16(uint16_t a, uint16x4_t b) { return vset_lane_u16(a, b, 3); } // CHECK-LABEL: @test_vset_lane_u32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1 // CHECK: ret <2 x i32> [[VSET_LANE]] uint32x2_t test_vset_lane_u32(uint32_t a, uint32x2_t b) { return vset_lane_u32(a, b, 1); @@ -14086,18 +14046,14 @@ } // CHECK-LABEL: @test_vset_lane_s16( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3 // CHECK: ret <4 x i16> [[VSET_LANE]] int16x4_t test_vset_lane_s16(int16_t a, int16x4_t b) { return vset_lane_s16(a, b, 3); } // CHECK-LABEL: @test_vset_lane_s32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i32> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> [[TMP1]], i32 %a, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i32> %b, i32 %a, i32 1 // CHECK: ret <2 x i32> [[VSET_LANE]] int32x2_t test_vset_lane_s32(int32_t a, int32x2_t b) { return vset_lane_s32(a, b, 1); @@ -14111,18 +14067,14 @@ } // CHECK-LABEL: @test_vset_lane_p16( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP1]], i16 %a, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> %b, i16 %a, i32 3 // CHECK: ret <4 x i16> [[VSET_LANE]] poly16x4_t test_vset_lane_p16(poly16_t a, poly16x4_t b) { return vset_lane_p16(a, b, 3); } // CHECK-LABEL: @test_vset_lane_f32( -// CHECK: [[TMP0:%.*]] = bitcast <2 x float> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x float> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> [[TMP1]], float %a, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x float> %b, float %a, i32 1 // CHECK: ret <2 x float> [[VSET_LANE]] float32x2_t test_vset_lane_f32(float32_t a, float32x2_t b) { return vset_lane_f32(a, b, 1); @@ -14139,9 +14091,7 @@ // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 // CHECK: [[TMP3:%.*]] = bitcast <4 x half>* [[__REINT1_246]] to <4 x i16>* // CHECK: [[TMP4:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 8 -// CHECK: [[TMP5:%.*]] = bitcast <4 x i16> [[TMP4]] to <8 x i8> -// CHECK: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP5]] to <4 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP6]], i16 [[TMP2]], i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i16> [[TMP4]], i16 [[TMP2]], i32 1 // CHECK: store <4 x i16> [[VSET_LANE]], <4 x i16>* [[__REINT2_246]], align 8 // CHECK: [[TMP7:%.*]] = bitcast <4 x i16>* [[__REINT2_246]] to <4 x half>* // CHECK: [[TMP8:%.*]] = load <4 x half>, <4 x half>* [[TMP7]], align 8 @@ -14158,18 +14108,14 @@ } // CHECK-LABEL: @test_vsetq_lane_u16( -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 +// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7 // CHECK: ret <8 x i16> [[VSET_LANE]] uint16x8_t test_vsetq_lane_u16(uint16_t a, uint16x8_t b) { return vsetq_lane_u16(a, b, 7); } // CHECK-LABEL: @test_vsetq_lane_u32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3 // CHECK: ret <4 x i32> [[VSET_LANE]] uint32x4_t test_vsetq_lane_u32(uint32_t a, uint32x4_t b) { return vsetq_lane_u32(a, b, 3); @@ -14183,18 +14129,14 @@ } // CHECK-LABEL: @test_vsetq_lane_s16( -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 +// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7 // CHECK: ret <8 x i16> [[VSET_LANE]] int16x8_t test_vsetq_lane_s16(int16_t a, int16x8_t b) { return vsetq_lane_s16(a, b, 7); } // CHECK-LABEL: @test_vsetq_lane_s32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x i32> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> [[TMP1]], i32 %a, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x i32> %b, i32 %a, i32 3 // CHECK: ret <4 x i32> [[VSET_LANE]] int32x4_t test_vsetq_lane_s32(int32_t a, int32x4_t b) { return vsetq_lane_s32(a, b, 3); @@ -14208,18 +14150,14 @@ } // CHECK-LABEL: @test_vsetq_lane_p16( -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP1]], i16 %a, i32 7 +// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> %b, i16 %a, i32 7 // CHECK: ret <8 x i16> [[VSET_LANE]] poly16x8_t test_vsetq_lane_p16(poly16_t a, poly16x8_t b) { return vsetq_lane_p16(a, b, 7); } // CHECK-LABEL: @test_vsetq_lane_f32( -// CHECK: [[TMP0:%.*]] = bitcast <4 x float> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x float> -// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> [[TMP1]], float %a, i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <4 x float> %b, float %a, i32 3 // CHECK: ret <4 x float> [[VSET_LANE]] float32x4_t test_vsetq_lane_f32(float32_t a, float32x4_t b) { return vsetq_lane_f32(a, b, 3); @@ -14236,9 +14174,7 @@ // CHECK: [[TMP2:%.*]] = load i16, i16* [[TMP1]], align 2 // CHECK: [[TMP3:%.*]] = bitcast <8 x half>* [[__REINT1_248]] to <8 x i16>* // CHECK: [[TMP4:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 16 -// CHECK: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP4]] to <16 x i8> -// CHECK: [[TMP6:%.*]] = bitcast <16 x i8> [[TMP5]] to <8 x i16> -// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP6]], i16 [[TMP2]], i32 3 +// CHECK: [[VSET_LANE:%.*]] = insertelement <8 x i16> [[TMP4]], i16 [[TMP2]], i32 3 // CHECK: store <8 x i16> [[VSET_LANE]], <8 x i16>* [[__REINT2_248]], align 16 // CHECK: [[TMP7:%.*]] = bitcast <8 x i16>* [[__REINT2_248]] to <8 x half>* // CHECK: [[TMP8:%.*]] = load <8 x half>, <8 x half>* [[TMP7]], align 16 @@ -14248,36 +14184,28 @@ } // CHECK-LABEL: @test_vset_lane_s64( -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0 // CHECK: ret <1 x i64> [[VSET_LANE]] int64x1_t test_vset_lane_s64(int64_t a, int64x1_t b) { return vset_lane_s64(a, b, 0); } // CHECK-LABEL: @test_vset_lane_u64( -// CHECK: [[TMP0:%.*]] = bitcast <1 x i64> %b to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> [[TMP1]], i64 %a, i32 0 +// CHECK: [[VSET_LANE:%.*]] = insertelement <1 x i64> %b, i64 %a, i32 0 // CHECK: ret <1 x i64> [[VSET_LANE]] uint64x1_t test_vset_lane_u64(uint64_t a, uint64x1_t b) { return vset_lane_u64(a, b, 0); } // CHECK-LABEL: @test_vsetq_lane_s64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1 // CHECK: ret <2 x i64> [[VSET_LANE]] int64x2_t test_vsetq_lane_s64(int64_t a, int64x2_t b) { return vsetq_lane_s64(a, b, 1); } // CHECK-LABEL: @test_vsetq_lane_u64( -// CHECK: [[TMP0:%.*]] = bitcast <2 x i64> %b to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <2 x i64> -// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> [[TMP1]], i64 %a, i32 1 +// CHECK: [[VSET_LANE:%.*]] = insertelement <2 x i64> %b, i64 %a, i32 1 // CHECK: ret <2 x i64> [[VSET_LANE]] uint64x2_t test_vsetq_lane_u64(uint64_t a, uint64x2_t b) { return vsetq_lane_u64(a, b, 1); diff --git a/clang/test/Headers/arm-neon-header.c b/clang/test/Headers/arm-neon-header.c --- a/clang/test/Headers/arm-neon-header.c +++ b/clang/test/Headers/arm-neon-header.c @@ -20,4 +20,7 @@ // RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding -nostdinc++ --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c++14 -xc++ %s // RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding -nostdinc++ --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c++17 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64-none-eabi -march=armv8.2-a+fp16 -std=c11 -xc -flax-vector-conversions=none %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=aarch64_be-none-eabi -march=armv8.2-a+fp16 -std=c11 -xc -flax-vector-conversions=none %s + #include diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -636,7 +636,7 @@ default: llvm_unreachable("Unhandled case!"); } - if (isChar() && !Pointer) + if (isChar() && !Pointer && Signed) // Make chars explicitly signed. S = "S" + S; else if (isInteger() && !Pointer && !Signed) @@ -1442,7 +1442,7 @@ } // Check if an explicit cast is needed. - if (CastToType.isVector()) { + if (CastToType.isVector() && LocalCK == ClassB) { CastToType.makeInteger(8, true); Arg = "(" + CastToType.str() + ")" + Arg; }