diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -5251,6 +5251,8 @@ NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_s64_f64, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvtd_u64_f64, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), NEONMAP1(vcvth_bf16_f32, aarch64_neon_bfcvt, 0), NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), @@ -5268,6 +5270,8 @@ NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType), NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvts_s32_f32, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvts_u32_f32, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0), NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType), @@ -5426,6 +5430,10 @@ NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType), + NEONMAP1(vcvth_s32_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvth_s64_f16, aarch64_neon_fcvtzs, AddRetType | Add1ArgType), + NEONMAP1(vcvth_u32_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), + NEONMAP1(vcvth_u64_f16, aarch64_neon_fcvtzu, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType), NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType), @@ -8995,21 +9003,6 @@ Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy); return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr); } - case NEON::BI__builtin_neon_vcvts_u32_f32: - case NEON::BI__builtin_neon_vcvtd_u64_f64: - usgn = true; - LLVM_FALLTHROUGH; - case NEON::BI__builtin_neon_vcvts_s32_f32: - case NEON::BI__builtin_neon_vcvtd_s64_f64: { - Ops.push_back(EmitScalarExpr(E->getArg(0))); - bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64; - llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty; - llvm::Type *FTy = Is64 ? DoubleTy : FloatTy; - Ops[0] = Builder.CreateBitCast(Ops[0], FTy); - if (usgn) - return Builder.CreateFPToUI(Ops[0], InTy); - return Builder.CreateFPToSI(Ops[0], InTy); - } case NEON::BI__builtin_neon_vcvts_f32_u32: case NEON::BI__builtin_neon_vcvtd_f64_u64: usgn = true; @@ -9047,44 +9040,16 @@ return Builder.CreateUIToFP(Ops[0], FTy); return Builder.CreateSIToFP(Ops[0], FTy); } - case NEON::BI__builtin_neon_vcvth_u16_f16: - usgn = true; - LLVM_FALLTHROUGH; - case NEON::BI__builtin_neon_vcvth_s16_f16: { - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); - if (usgn) - return Builder.CreateFPToUI(Ops[0], Int16Ty); - return Builder.CreateFPToSI(Ops[0], Int16Ty); - } - case NEON::BI__builtin_neon_vcvth_u32_f16: - usgn = true; - LLVM_FALLTHROUGH; - case NEON::BI__builtin_neon_vcvth_s32_f16: { - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); - if (usgn) - return Builder.CreateFPToUI(Ops[0], Int32Ty); - return Builder.CreateFPToSI(Ops[0], Int32Ty); - } - case NEON::BI__builtin_neon_vcvth_u64_f16: - usgn = true; - LLVM_FALLTHROUGH; - case NEON::BI__builtin_neon_vcvth_s64_f16: { - Ops.push_back(EmitScalarExpr(E->getArg(0))); - Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy); - if (usgn) - return Builder.CreateFPToUI(Ops[0], Int64Ty); - return Builder.CreateFPToSI(Ops[0], Int64Ty); - } case NEON::BI__builtin_neon_vcvtah_u16_f16: case NEON::BI__builtin_neon_vcvtmh_u16_f16: case NEON::BI__builtin_neon_vcvtnh_u16_f16: case NEON::BI__builtin_neon_vcvtph_u16_f16: + case NEON::BI__builtin_neon_vcvth_u16_f16: case NEON::BI__builtin_neon_vcvtah_s16_f16: case NEON::BI__builtin_neon_vcvtmh_s16_f16: case NEON::BI__builtin_neon_vcvtnh_s16_f16: - case NEON::BI__builtin_neon_vcvtph_s16_f16: { + case NEON::BI__builtin_neon_vcvtph_s16_f16: + case NEON::BI__builtin_neon_vcvth_s16_f16: { unsigned Int; llvm::Type* InTy = Int32Ty; llvm::Type* FTy = HalfTy; @@ -9100,6 +9065,8 @@ Int = Intrinsic::aarch64_neon_fcvtnu; break; case NEON::BI__builtin_neon_vcvtph_u16_f16: Int = Intrinsic::aarch64_neon_fcvtpu; break; + case NEON::BI__builtin_neon_vcvth_u16_f16: + Int = Intrinsic::aarch64_neon_fcvtzu; break; case NEON::BI__builtin_neon_vcvtah_s16_f16: Int = Intrinsic::aarch64_neon_fcvtas; break; case NEON::BI__builtin_neon_vcvtmh_s16_f16: @@ -9108,6 +9075,8 @@ Int = Intrinsic::aarch64_neon_fcvtns; break; case NEON::BI__builtin_neon_vcvtph_s16_f16: Int = Intrinsic::aarch64_neon_fcvtps; break; + case NEON::BI__builtin_neon_vcvth_s16_f16: + Int = Intrinsic::aarch64_neon_fcvtzs; break; } Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt"); return Builder.CreateTrunc(Ops[0], Int16Ty); @@ -10148,10 +10117,10 @@ case NEON::BI__builtin_neon_vcvtq_u64_v: case NEON::BI__builtin_neon_vcvtq_s16_v: case NEON::BI__builtin_neon_vcvtq_u16_v: { - Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type)); - if (usgn) - return Builder.CreateFPToUI(Ops[0], Ty); - return Builder.CreateFPToSI(Ops[0], Ty); + Int = + usgn ? Intrinsic::aarch64_neon_fcvtzu : Intrinsic::aarch64_neon_fcvtzs; + llvm::Type *Tys[2] = {Ty, GetFloatNeonType(this, Type)}; + return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtz"); } case NEON::BI__builtin_neon_vcvta_s16_v: case NEON::BI__builtin_neon_vcvta_u16_v: diff --git a/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c b/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c --- a/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-fcvt-intrinsics.c @@ -125,28 +125,28 @@ } // CHECK-LABEL: define i32 @test_vcvts_s32_f32(float %a) #0 { -// CHECK: [[TMP0:%.*]] = fptosi float %a to i32 +// CHECK: [[TMP0:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f32(float %a) // CHECK: ret i32 [[TMP0]] int32_t test_vcvts_s32_f32(float32_t a) { return (int32_t)vcvts_s32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtd_s64_f64(double %a) #0 { -// CHECK: [[TMP0:%.*]] = fptosi double %a to i64 +// CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f64(double %a) // CHECK: ret i64 [[TMP0]] int64_t test_vcvtd_s64_f64(float64_t a) { return (int64_t)vcvtd_s64_f64(a); } // CHECK-LABEL: define i32 @test_vcvts_u32_f32(float %a) #0 { -// CHECK: [[TMP0:%.*]] = fptoui float %a to i32 +// CHECK: [[TMP0:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f32(float %a) // CHECK: ret i32 [[TMP0]] uint32_t test_vcvts_u32_f32(float32_t a) { return (uint32_t)vcvts_u32_f32(a); } // CHECK-LABEL: define i64 @test_vcvtd_u64_f64(double %a) #0 { -// CHECK: [[TMP0:%.*]] = fptoui double %a to i64 +// CHECK: [[TMP0:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f64(double %a) // CHECK: ret i64 [[TMP0]] uint64_t test_vcvtd_u64_f64(float64_t a) { return (uint64_t)vcvtd_u64_f64(a); diff --git a/clang/test/CodeGen/aarch64-neon-intrinsics.c b/clang/test/CodeGen/aarch64-neon-intrinsics.c --- a/clang/test/CodeGen/aarch64-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-neon-intrinsics.c @@ -18023,7 +18023,7 @@ // CHECK-LABEL: @test_vcvt_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = fptosi <1 x double> %a to <1 x i64> +// CHECK: [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzs.v1i64.v1f64(<1 x double> %a) // CHECK: ret <1 x i64> [[TMP1]] int64x1_t test_vcvt_s64_f64(float64x1_t a) { return vcvt_s64_f64(a); @@ -18031,7 +18031,7 @@ // CHECK-LABEL: @test_vcvt_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <1 x double> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = fptoui <1 x double> %a to <1 x i64> +// CHECK: [[TMP1:%.*]] = call <1 x i64> @llvm.aarch64.neon.fcvtzu.v1i64.v1f64(<1 x double> %a) // CHECK: ret <1 x i64> [[TMP1]] uint64x1_t test_vcvt_u64_f64(float64x1_t a) { return vcvt_u64_f64(a); diff --git a/clang/test/CodeGen/aarch64-neon-misc.c b/clang/test/CodeGen/aarch64-neon-misc.c --- a/clang/test/CodeGen/aarch64-neon-misc.c +++ b/clang/test/CodeGen/aarch64-neon-misc.c @@ -2311,7 +2311,7 @@ // CHECK-LABEL: @test_vcvt_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = fptosi <2 x float> %a to <2 x i32> +// CHECK: [[TMP1:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtzs.v2i32.v2f32(<2 x float> %a) // CHECK: ret <2 x i32> [[TMP1]] int32x2_t test_vcvt_s32_f32(float32x2_t a) { return vcvt_s32_f32(a); @@ -2319,7 +2319,7 @@ // CHECK-LABEL: @test_vcvtq_s32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = fptosi <4 x float> %a to <4 x i32> +// CHECK: [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtzs.v4i32.v4f32(<4 x float> %a) // CHECK: ret <4 x i32> [[TMP1]] int32x4_t test_vcvtq_s32_f32(float32x4_t a) { return vcvtq_s32_f32(a); @@ -2327,7 +2327,7 @@ // CHECK-LABEL: @test_vcvtq_s64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = fptosi <2 x double> %a to <2 x i64> +// CHECK: [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtzs.v2i64.v2f64(<2 x double> %a) // CHECK: ret <2 x i64> [[TMP1]] int64x2_t test_vcvtq_s64_f64(float64x2_t a) { return vcvtq_s64_f64(a); @@ -2335,7 +2335,7 @@ // CHECK-LABEL: @test_vcvt_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <2 x float> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = fptoui <2 x float> %a to <2 x i32> +// CHECK: [[TMP1:%.*]] = call <2 x i32> @llvm.aarch64.neon.fcvtzu.v2i32.v2f32(<2 x float> %a) // CHECK: ret <2 x i32> [[TMP1]] uint32x2_t test_vcvt_u32_f32(float32x2_t a) { return vcvt_u32_f32(a); @@ -2343,7 +2343,7 @@ // CHECK-LABEL: @test_vcvtq_u32_f32( // CHECK: [[TMP0:%.*]] = bitcast <4 x float> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = fptoui <4 x float> %a to <4 x i32> +// CHECK: [[TMP1:%.*]] = call <4 x i32> @llvm.aarch64.neon.fcvtzu.v4i32.v4f32(<4 x float> %a) // CHECK: ret <4 x i32> [[TMP1]] uint32x4_t test_vcvtq_u32_f32(float32x4_t a) { return vcvtq_u32_f32(a); @@ -2351,7 +2351,7 @@ // CHECK-LABEL: @test_vcvtq_u64_f64( // CHECK: [[TMP0:%.*]] = bitcast <2 x double> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = fptoui <2 x double> %a to <2 x i64> +// CHECK: [[TMP1:%.*]] = call <2 x i64> @llvm.aarch64.neon.fcvtzu.v2i64.v2f64(<2 x double> %a) // CHECK: ret <2 x i64> [[TMP1]] uint64x2_t test_vcvtq_u64_f64(float64x2_t a) { return vcvtq_u64_f64(a); diff --git a/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c --- a/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c +++ b/clang/test/CodeGen/aarch64-v8.2a-fp16-intrinsics.c @@ -97,42 +97,44 @@ } // CHECK-LABEL: test_vcvth_s16_f16 -// CHECK: [[VCVT:%.*]] = fptosi half %a to i16 -// CHECK: ret i16 [[VCVT]] +// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a) +// CHECK: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16 +// CHECK: ret i16 [[TRUNC]] int16_t test_vcvth_s16_f16 (float16_t a) { return vcvth_s16_f16(a); } // CHECK-LABEL: test_vcvth_s32_f16 -// CHECK: [[VCVT:%.*]] = fptosi half %a to i32 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzs.i32.f16(half %a) // CHECK: ret i32 [[VCVT]] int32_t test_vcvth_s32_f16 (float16_t a) { return vcvth_s32_f16(a); } // CHECK-LABEL: test_vcvth_s64_f16 -// CHECK: [[VCVT:%.*]] = fptosi half %a to i64 +// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtzs.i64.f16(half %a) // CHECK: ret i64 [[VCVT]] int64_t test_vcvth_s64_f16 (float16_t a) { return vcvth_s64_f16(a); } // CHECK-LABEL: test_vcvth_u16_f16 -// CHECK: [[VCVT:%.*]] = fptoui half %a to i16 -// CHECK: ret i16 [[VCVT]] +// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a) +// CHECK: [[TRUNC:%.*]] = trunc i32 [[VCVT]] to i16 +// CHECK: ret i16 [[TRUNC]] uint16_t test_vcvth_u16_f16 (float16_t a) { return vcvth_u16_f16(a); } // CHECK-LABEL: test_vcvth_u32_f16 -// CHECK: [[VCVT:%.*]] = fptoui half %a to i32 +// CHECK: [[VCVT:%.*]] = call i32 @llvm.aarch64.neon.fcvtzu.i32.f16(half %a) // CHECK: ret i32 [[VCVT]] uint32_t test_vcvth_u32_f16 (float16_t a) { return vcvth_u32_f16(a); } // CHECK-LABEL: test_vcvth_u64_f16 -// CHECK: [[VCVT:%.*]] = fptoui half %a to i64 +// CHECK: [[VCVT:%.*]] = call i64 @llvm.aarch64.neon.fcvtzu.i64.f16(half %a) // CHECK: ret i64 [[VCVT]] uint64_t test_vcvth_u64_f16 (float16_t a) { return vcvth_u64_f16(a); diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c --- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -130,28 +130,28 @@ } // CHECK-LABEL: test_vcvt_s16_f16 -// CHECK: [[VCVT:%.*]] = fptosi <4 x half> %a to <4 x i16> +// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtzs.v4i16.v4f16(<4 x half> %a) // CHECK: ret <4 x i16> [[VCVT]] int16x4_t test_vcvt_s16_f16 (float16x4_t a) { return vcvt_s16_f16(a); } // CHECK-LABEL: test_vcvtq_s16_f16 -// CHECK: [[VCVT:%.*]] = fptosi <8 x half> %a to <8 x i16> +// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtzs.v8i16.v8f16(<8 x half> %a) // CHECK: ret <8 x i16> [[VCVT]] int16x8_t test_vcvtq_s16_f16 (float16x8_t a) { return vcvtq_s16_f16(a); } // CHECK-LABEL: test_vcvt_u16_f16 -// CHECK: [[VCVT:%.*]] = fptoui <4 x half> %a to <4 x i16> +// CHECK: [[VCVT:%.*]] = call <4 x i16> @llvm.aarch64.neon.fcvtzu.v4i16.v4f16(<4 x half> %a) // CHECK: ret <4 x i16> [[VCVT]] uint16x4_t test_vcvt_u16_f16 (float16x4_t a) { return vcvt_u16_f16(a); } // CHECK-LABEL: test_vcvtq_u16_f16 -// CHECK: [[VCVT:%.*]] = fptoui <8 x half> %a to <8 x i16> +// CHECK: [[VCVT:%.*]] = call <8 x i16> @llvm.aarch64.neon.fcvtzu.v8i16.v8f16(<8 x half> %a) // CHECK: ret <8 x i16> [[VCVT]] uint16x8_t test_vcvtq_u16_f16 (float16x8_t a) { return vcvtq_u16_f16(a);