diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -237,11 +237,18 @@ let params = T.Int in { def vmvnq: Intrinsic; + defm vmvnq: IntrinsicMX $a, $pred, $inactive)>; def vclzq: Intrinsic $a, (i1 0))>; + defm vclzq: IntrinsicMX $a, $pred, $inactive)>; } let params = T.Signed in { def vclsq: Intrinsic $a)>; + defm vclsq: IntrinsicMX $a, $pred, $inactive)>; + def vnegq: Intrinsic; def vabsq: Intrinsic; + + foreach name = ["qneg", "qabs"] in { + defm v#name#q: IntrinsicMX $a, $pred, $inactive), + 0 /* no _x variant for saturating intrinsics */>; + } +} +let params = !listconcat(T.Signed, T.Float) in { + foreach name = ["neg", "abs"] in { + defm v#name#q: IntrinsicMX $a, $pred, $inactive)>; + } } let params = T.Float in { def vnegq_f: Intrinsic, @@ -440,49 +459,77 @@ defvar FVector = VecOf; defvar IVector = VecOf; - let params = [IScalar], pnt = PNT_2Type in - def : Intrinsic, - NameOverride<"vcvtq_" # FScalar>; - let params = [FScalar], pnt = PNT_None in - def : Intrinsic, - NameOverride<"vcvtq_" # IScalar>; + let params = [IScalar] in { + let pnt = PNT_2Type in { + def : Intrinsic, + NameOverride<"vcvtq_" # FScalar>; + } + defm vcvtq: IntrinsicMX + $a, (unsignedflag IScalar), $pred, $inactive), + 1, "_" # FScalar, PNT_2Type, PNT_2Type>; + } + let params = [FScalar] in { + let pnt = PNT_None in { + def : Intrinsic, + NameOverride<"vcvtq_" # IScalar>; + } + defm vcvtq: IntrinsicMX + $a, (unsignedflag IScalar), $pred, $inactive), + 1, "_" # IScalar, PNT_2Type, PNT_None>; + } } -defm : float_int_conversions; -defm : float_int_conversions; -defm : float_int_conversions; -defm : float_int_conversions; +defm "" : float_int_conversions; +defm "" : float_int_conversions; +defm "" : float_int_conversions; +defm "" : float_int_conversions; -let params = [s8, u8, s16, u16] in { - def vmovlbq: Intrinsic; - def vmovltq: Intrinsic; +multiclass vmovl { + let params = [s8, u8, s16, u16] in { + def "": Intrinsic; + defm "": IntrinsicMX + $a, (unsignedflag Scalar), top, $pred, $inactive)>; + } } -let params = [s16, u16, s32, u32] in { - def vmovnbq: Intrinsic; - def vmovntq: Intrinsic; +defm vmovlbq: vmovl<0>; +defm vmovltq: vmovl<1>; + +multiclass vmovn { + let params = [s16, u16, s32, u32] in { + def "": Intrinsic; + def _m: Intrinsic + $inactive, $a, top, $pred)>; + } } -let params = T.Float in { - def vrndq: Intrinsic $a)>; - def vrndmq: Intrinsic $a)>; - def vrndpq: Intrinsic $a)>; - def vrndaq: Intrinsic $a)>; - def vrndxq: Intrinsic $a)>; - def vrndnq: Intrinsic $a)>; +defm vmovntq: vmovn<1, (zip (vreinterpret $inactive, Vector), $a)>; +defm vmovnbq: vmovn<0, + (zip $a, (vreinterpret (vrev $inactive, (bitsize Scalar)), Vector))>; + +multiclass vrnd { + let params = T.Float in { + def "": Intrinsic; + defm "": IntrinsicMX + $a, $pred, $inactive)>; + } } +defm vrndq: vrnd, "z">; +defm vrndmq: vrnd, "m">; +defm vrndpq: vrnd, "p">; +defm vrndaq: vrnd, "a">; +defm vrndxq: vrnd, "x">; +defm vrndnq: vrnd, "n">; + multiclass compare_with_pred { // Make the predicated and unpredicated versions of a single comparison. @@ -1231,12 +1278,24 @@ defm vrmlsldavh : MVEBinaryVectorHoriz64R; } -let params = T.All8 in -def vrev16q : Intrinsic; -let params = !listconcat(T.All8, T.All16) in -def vrev32q : Intrinsic; -let params = T.Usual in -def vrev64q : Intrinsic; +multiclass vrev_predicated { + defm "" : IntrinsicMX + $a, revsize, $pred, $inactive)>; +} + +let params = T.All8 in { + def vrev16q : Intrinsic; + defm vrev16q: vrev_predicated<16>; +} +let params = !listconcat(T.All8, T.All16) in { + def vrev32q : Intrinsic; + defm vrev32q: vrev_predicated<32>; +} +let params = T.Usual in { + def vrev64q : Intrinsic; + defm vrev64q: vrev_predicated<64>; +} foreach desttype = T.All in { // We want a vreinterpretq between every pair of supported vector types diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -506,8 +506,8 @@ // provides the input value of the output register, i.e. all the // inactive lanes in the predicated operation take their values from // this. - def "_m" # nameSuffix: - Intrinsic { + def : Intrinsic, + NameOverride { let pnt = pnt_m; } @@ -515,8 +515,8 @@ // The _x variant leaves off that parameter, and simply uses an // undef value of the same type. - def "_x" # nameSuffix: - Intrinsic { + def : Intrinsic, + NameOverride { let pnt = pnt_x; } } diff --git a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c --- a/clang/test/CodeGen/arm-mve-intrinsics/absneg.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/absneg.c @@ -164,6 +164,198 @@ #endif /* POLYMORPHIC */ } +// CHECK-LABEL: @test_vmvnq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vmvnq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmvnq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmvnq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmvnq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmvnq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmvnq_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vmvnq_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmvnq_m_u8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmvnq_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmvnq_m_u16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmvnq_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmvnq_m_u32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vmvnq_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_x(a, p); +#else /* POLYMORPHIC */ + return vmvnq_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmvnq_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_x(a, p); +#else /* POLYMORPHIC */ + return vmvnq_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_x_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmvnq_x_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_x(a, p); +#else /* POLYMORPHIC */ + return vmvnq_x_s32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_x_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vmvnq_x_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_x(a, p); +#else /* POLYMORPHIC */ + return vmvnq_x_u8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_x_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmvnq_x_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_x(a, p); +#else /* POLYMORPHIC */ + return vmvnq_x_u16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_x_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmvnq_x_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_x(a, p); +#else /* POLYMORPHIC */ + return vmvnq_x_u32(a, p); +#endif /* POLYMORPHIC */ +} + // CHECK-LABEL: @test_vnegq_f16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = fneg <8 x half> [[A:%.*]] @@ -335,4 +527,427 @@ return vqnegq_s32(a); #endif /* POLYMORPHIC */ } +#include + +// CHECK-LABEL: @test_vnegq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vnegq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vnegq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vnegq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vnegq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vnegq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vnegq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vnegq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vnegq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vnegq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vnegq_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vnegq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_x(a, p); +#else /* POLYMORPHIC */ + return vnegq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vnegq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_x(a, p); +#else /* POLYMORPHIC */ + return vnegq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vnegq_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_x(a, p); +#else /* POLYMORPHIC */ + return vnegq_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vnegq_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_x(a, p); +#else /* POLYMORPHIC */ + return vnegq_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vnegq_x_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vnegq_x_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vnegq_x(a, p); +#else /* POLYMORPHIC */ + return vnegq_x_s32(a, p); +#endif /* POLYMORPHIC */ +} + +#include + +// CHECK-LABEL: @test_vabsq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vabsq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vabsq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vabsq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vabsq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vabsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vabsq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vabsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vabsq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vabsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vabsq_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vabsq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_x(a, p); +#else /* POLYMORPHIC */ + return vabsq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vabsq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_x(a, p); +#else /* POLYMORPHIC */ + return vabsq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vabsq_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_x(a, p); +#else /* POLYMORPHIC */ + return vabsq_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vabsq_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_x(a, p); +#else /* POLYMORPHIC */ + return vabsq_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vabsq_x_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vabsq_x_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vabsq_x(a, p); +#else /* POLYMORPHIC */ + return vabsq_x_s32(a, p); +#endif /* POLYMORPHIC */ +} + +#include + +// CHECK-LABEL: @test_vqnegq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vqnegq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vqnegq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqnegq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vqnegq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vqnegq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqnegq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vqnegq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqnegq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vqnegq_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +#include + +// CHECK-LABEL: @test_vqabsq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vqabsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vqabsq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqabsq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vqabsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vqabsq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqabsq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vqabsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqabsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vqabsq_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vclz.c b/clang/test/CodeGen/arm-mve-intrinsics/vclz.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vclz.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vclz.c @@ -130,3 +130,290 @@ #endif /* POLYMORPHIC */ } +// CHECK-LABEL: @test_vclsq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vclsq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclsq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclsq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vclsq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclsq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclsq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vclsq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclsq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclsq_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vclzq_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclzq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vclzq_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclzq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vclzq_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclzq_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vclzq_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclzq_m_u8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vclzq_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclzq_m_u16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_m_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vclzq_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vclzq_m_u32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclsq_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vclsq_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclsq_x(a, p); +#else /* POLYMORPHIC */ + return vclsq_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclsq_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vclsq_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclsq_x(a, p); +#else /* POLYMORPHIC */ + return vclsq_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclsq_x_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vclsq_x_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclsq_x(a, p); +#else /* POLYMORPHIC */ + return vclsq_x_s32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vclzq_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_x(a, p); +#else /* POLYMORPHIC */ + return vclzq_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vclzq_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_x(a, p); +#else /* POLYMORPHIC */ + return vclzq_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_x_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vclzq_x_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_x(a, p); +#else /* POLYMORPHIC */ + return vclzq_x_s32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_x_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vclzq_x_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_x(a, p); +#else /* POLYMORPHIC */ + return vclzq_x_u8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_x_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vclzq_x_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_x(a, p); +#else /* POLYMORPHIC */ + return vclzq_x_u16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vclzq_x_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vclzq_x_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vclzq_x(a, p); +#else /* POLYMORPHIC */ + return vclzq_x_u32(a, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vcvt.c @@ -100,6 +100,246 @@ return vcvtq_u32_f32(a); } +// CHECK-LABEL: @test_vcvtq_m_f16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vcvtq_m_f16_s16(float16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_f16_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_m_f16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vcvtq_m_f16_u16(float16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_f16_u16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_m_f32_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vcvtq_m_f32_s32(float32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_f32_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_m_f32_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vcvtq_m_f32_u32(float32x4_t inactive, uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_f32_u32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_m_s16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vcvtq_m_s16_f16(int16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_s16_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_m_s32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vcvtq_m_s32_f32(int32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_s32_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_m_u16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vcvtq_m_u16_f16(uint16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_u16_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_m_u32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vcvtq_m_u32_f32(uint32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vcvtq_m_u32_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_x_f16_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vcvtq_x_f16_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_x(a, p); +#else /* POLYMORPHIC */ + return vcvtq_x_f16_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_x_f16_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vcvtq_x_f16_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_x(a, p); +#else /* POLYMORPHIC */ + return vcvtq_x_f16_u16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_x_f32_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vcvtq_x_f32_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_x(a, p); +#else /* POLYMORPHIC */ + return vcvtq_x_f32_s32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_x_f32_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vcvtq_x_f32_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vcvtq_x(a, p); +#else /* POLYMORPHIC */ + return vcvtq_x_f32_u32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vcvtq_x_s16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vcvtq_x_s16_f16(float16x8_t a, mve_pred16_t p) +{ + return vcvtq_x_s16_f16(a, p); +} + +// CHECK-LABEL: @test_vcvtq_x_s32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vcvtq_x_s32_f32(float32x4_t a, mve_pred16_t p) +{ + return vcvtq_x_s32_f32(a, p); +} + +// CHECK-LABEL: @test_vcvtq_x_u16_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vcvtq_x_u16_f16(float16x8_t a, mve_pred16_t p) +{ + return vcvtq_x_u16_f16(a, p); +} + +// CHECK-LABEL: @test_vcvtq_x_u32_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> [[A:%.*]], i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vcvtq_x_u32_f32(float32x4_t a, mve_pred16_t p) +{ + return vcvtq_x_u32_f32(a, p); +} + // CHECK-LABEL: @test_vcvttq_f16_f32( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = call <8 x half> @llvm.arm.mve.vcvt.narrow(<8 x half> [[A:%.*]], <4 x float> [[B:%.*]], i32 1) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c b/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmovl.c @@ -124,3 +124,259 @@ #endif /* POLYMORPHIC */ } +// CHECK-LABEL: @test_vmovlbq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmovlbq_m_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovlbq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovlbq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmovlbq_m_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovlbq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovlbq_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmovlbq_m_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovlbq_m_u8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovlbq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmovlbq_m_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovlbq_m_u16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmovltq_m_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovltq_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmovltq_m_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovltq_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmovltq_m_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovltq_m_u8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmovltq_m_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vmovltq_m_u16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovlbq_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmovlbq_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_x(a, p); +#else /* POLYMORPHIC */ + return vmovlbq_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovlbq_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmovlbq_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_x(a, p); +#else /* POLYMORPHIC */ + return vmovlbq_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovlbq_x_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmovlbq_x_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_x(a, p); +#else /* POLYMORPHIC */ + return vmovlbq_x_u8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovlbq_x_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmovlbq_x_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovlbq_x(a, p); +#else /* POLYMORPHIC */ + return vmovlbq_x_u16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 0, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmovltq_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_x(a, p); +#else /* POLYMORPHIC */ + return vmovltq_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 0, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmovltq_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_x(a, p); +#else /* POLYMORPHIC */ + return vmovltq_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_x_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> [[A:%.*]], i32 1, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmovltq_x_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_x(a, p); +#else /* POLYMORPHIC */ + return vmovltq_x_u8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmovltq_x_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> [[A:%.*]], i32 1, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmovltq_x_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovltq_x(a, p); +#else /* POLYMORPHIC */ + return vmovltq_x_u16(a, p); +#endif /* POLYMORPHIC */ +} + diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c b/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vmovn.c @@ -197,3 +197,187 @@ return vmovntq_u32(a, b); #endif /* POLYMORPHIC */ } + +// LE-LABEL: @test_vmovnbq_m_s16( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]]) +// LE-NEXT: ret <16 x i8> [[TMP2]] +// +// BE-LABEL: @test_vmovnbq_m_s16( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]]) +// BE-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vmovnbq_m_s16(int8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovnbq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovnbq_m_s16(a, b, p); +#endif /* POLYMORPHIC */ +} + +// LE-LABEL: @test_vmovnbq_m_s32( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]]) +// LE-NEXT: ret <8 x i16> [[TMP2]] +// +// BE-LABEL: @test_vmovnbq_m_s32( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]]) +// BE-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmovnbq_m_s32(int16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovnbq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovnbq_m_s32(a, b, p); +#endif /* POLYMORPHIC */ +} + +// LE-LABEL: @test_vmovnbq_m_u16( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]]) +// LE-NEXT: ret <16 x i8> [[TMP2]] +// +// BE-LABEL: @test_vmovnbq_m_u16( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 0, <8 x i1> [[TMP1]]) +// BE-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vmovnbq_m_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovnbq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovnbq_m_u16(a, b, p); +#endif /* POLYMORPHIC */ +} + +// LE-LABEL: @test_vmovnbq_m_u32( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]]) +// LE-NEXT: ret <8 x i16> [[TMP2]] +// +// BE-LABEL: @test_vmovnbq_m_u32( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 0, <4 x i1> [[TMP1]]) +// BE-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmovnbq_m_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovnbq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovnbq_m_u32(a, b, p); +#endif /* POLYMORPHIC */ +} + +// LE-LABEL: @test_vmovntq_m_s16( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]]) +// LE-NEXT: ret <16 x i8> [[TMP2]] +// +// BE-LABEL: @test_vmovntq_m_s16( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]]) +// BE-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vmovntq_m_s16(int8x16_t a, int16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovntq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovntq_m_s16(a, b, p); +#endif /* POLYMORPHIC */ +} + +// LE-LABEL: @test_vmovntq_m_s32( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]]) +// LE-NEXT: ret <8 x i16> [[TMP2]] +// +// BE-LABEL: @test_vmovntq_m_s32( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]]) +// BE-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmovntq_m_s32(int16x8_t a, int32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovntq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovntq_m_s32(a, b, p); +#endif /* POLYMORPHIC */ +} + +// LE-LABEL: @test_vmovntq_m_u16( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]]) +// LE-NEXT: ret <16 x i8> [[TMP2]] +// +// BE-LABEL: @test_vmovntq_m_u16( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> [[A:%.*]], <8 x i16> [[B:%.*]], i32 1, <8 x i1> [[TMP1]]) +// BE-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vmovntq_m_u16(uint8x16_t a, uint16x8_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovntq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovntq_m_u16(a, b, p); +#endif /* POLYMORPHIC */ +} + +// LE-LABEL: @test_vmovntq_m_u32( +// LE-NEXT: entry: +// LE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// LE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// LE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]]) +// LE-NEXT: ret <8 x i16> [[TMP2]] +// +// BE-LABEL: @test_vmovntq_m_u32( +// BE-NEXT: entry: +// BE-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// BE-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// BE-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> [[A:%.*]], <4 x i32> [[B:%.*]], i32 1, <4 x i1> [[TMP1]]) +// BE-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmovntq_m_u32(uint16x8_t a, uint32x4_t b, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmovntq_m(a, b, p); +#else /* POLYMORPHIC */ + return vmovntq_m_u32(a, b, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrev.c b/clang/test/CodeGen/arm-mve-intrinsics/vrev.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vrev.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vrev.c @@ -213,3 +213,483 @@ return vrev64q_u32(a); #endif /* POLYMORPHIC */ } + +// CHECK-LABEL: @test_vrev16q_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrev16q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev16q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev16q_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev16q_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrev16q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev16q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev16q_m_u8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrev32q_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev32q_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrev32q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev32q_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vrev32q_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev32q_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrev32q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev32q_m_u8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vrev32q_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev32q_m_u16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrev64q_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrev64q_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrev64q_m_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_s8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vrev64q_m_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_s16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vrev64q_m_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_s32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrev64q_m_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_u8(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vrev64q_m_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_u16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_m_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vrev64q_m_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrev64q_m_u32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev16q_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrev16q_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev16q_x(a, p); +#else /* POLYMORPHIC */ + return vrev16q_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev16q_x_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 16, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrev16q_x_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev16q_x(a, p); +#else /* POLYMORPHIC */ + return vrev16q_x_u8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrev32q_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_x(a, p); +#else /* POLYMORPHIC */ + return vrev32q_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrev32q_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_x(a, p); +#else /* POLYMORPHIC */ + return vrev32q_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vrev32q_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_x(a, p); +#else /* POLYMORPHIC */ + return vrev32q_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_x_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 32, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrev32q_x_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_x(a, p); +#else /* POLYMORPHIC */ + return vrev32q_x_u8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev32q_x_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 32, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vrev32q_x_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev32q_x(a, p); +#else /* POLYMORPHIC */ + return vrev32q_x_u16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrev64q_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrev64q_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrev64q_x_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_s8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vrev64q_x_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_s16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vrev64q_x_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_s32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 64, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrev64q_x_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_u8(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 64, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vrev64q_x_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_u16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrev64q_x_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 64, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vrev64q_x_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrev64q_x(a, p); +#else /* POLYMORPHIC */ + return vrev64q_x_u32(a, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vrnd.c @@ -171,3 +171,388 @@ return vrndnq_f32(a); #endif /* POLYMORPHIC */ } + +// CHECK-LABEL: @test_vrndaq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndaq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndaq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndaq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndaq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndaq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndaq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndaq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndmq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndmq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndmq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndmq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndmq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndmq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndmq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndmq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndnq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndnq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndnq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndnq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndnq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndnq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndnq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndpq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndpq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndpq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndpq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndpq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndpq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndpq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndpq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndxq_m_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndxq_m_f16(float16x8_t inactive, float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndxq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndxq_m_f16(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndxq_m_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndxq_m_f32(float32x4_t inactive, float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndxq_m(inactive, a, p); +#else /* POLYMORPHIC */ + return vrndxq_m_f32(inactive, a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndaq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndaq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndaq_x(a, p); +#else /* POLYMORPHIC */ + return vrndaq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndaq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndaq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndaq_x(a, p); +#else /* POLYMORPHIC */ + return vrndaq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndmq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndmq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndmq_x(a, p); +#else /* POLYMORPHIC */ + return vrndmq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndmq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndmq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndmq_x(a, p); +#else /* POLYMORPHIC */ + return vrndmq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndnq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndnq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndnq_x(a, p); +#else /* POLYMORPHIC */ + return vrndnq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndnq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndnq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndnq_x(a, p); +#else /* POLYMORPHIC */ + return vrndnq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndpq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndpq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndpq_x(a, p); +#else /* POLYMORPHIC */ + return vrndpq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndpq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndpq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndpq_x(a, p); +#else /* POLYMORPHIC */ + return vrndpq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndq_x(a, p); +#else /* POLYMORPHIC */ + return vrndq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndq_x(a, p); +#else /* POLYMORPHIC */ + return vrndq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndxq_x_f16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> [[A:%.*]], <8 x i1> [[TMP1]], <8 x half> undef) +// CHECK-NEXT: ret <8 x half> [[TMP2]] +// +float16x8_t test_vrndxq_x_f16(float16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndxq_x(a, p); +#else /* POLYMORPHIC */ + return vrndxq_x_f16(a, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrndxq_x_f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> [[A:%.*]], <4 x i1> [[TMP1]], <4 x float> undef) +// CHECK-NEXT: ret <4 x float> [[TMP2]] +// +float32x4_t test_vrndxq_x_f32(float32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrndxq_x(a, p); +#else /* POLYMORPHIC */ + return vrndxq_x_f32(a, p); +#endif /* POLYMORPHIC */ +} + diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -1159,6 +1159,11 @@ [llvm_anyvector_ty /* input vector */, llvm_i32_ty /* scale */], LLVMMatchType<0>, llvm_anyvector_ty>; +def int_arm_mve_vcvt_fp_int_predicated: Intrinsic< + [llvm_anyvector_ty], [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, + llvm_anyvector_ty /* predicate */, LLVMMatchType<0> /* inactive */], + [IntrNoMem]>; + def int_arm_mve_vrintn: Intrinsic< [llvm_anyvector_ty], [LLVMMatchType<0>], [IntrNoMem]>; def int_arm_mve_vcls: Intrinsic< @@ -1178,4 +1183,32 @@ LLVMMatchType<0>], [IntrNoMem]>; +class MVESimpleUnaryPredicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; + +def int_arm_mve_mvn_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_abs_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_neg_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_qabs_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_qneg_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_clz_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_cls_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintz_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintm_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintp_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrinta_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintx_predicated: MVESimpleUnaryPredicated; +def int_arm_mve_vrintn_predicated: MVESimpleUnaryPredicated; + +def int_arm_mve_vrev_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_i32_ty /* size to reverse */, + llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; + +def int_arm_mve_vmovl_predicated: Intrinsic<[llvm_anyvector_ty], + [llvm_anyvector_ty, llvm_i32_ty /* unsigned */, llvm_i32_ty /* top half */, + llvm_anyvector_ty /* predicate */, LLVMMatchType<0>], [IntrNoMem]>; +def int_arm_mve_vmovn_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_anyvector_ty, llvm_i32_ty /* top half */, + llvm_anyvector_ty /* predicate */], [IntrNoMem]>; + } // end TargetPrefix diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1320,28 +1320,29 @@ (v4i32 (MVE_VREV32_8 (v4i32 MQPR:$src)))>; } -let Predicates = [HasMVEInt] in { - def : Pat<(v4i32 (ARMvrev64 (v4i32 MQPR:$src))), - (v4i32 (MVE_VREV64_32 (v4i32 MQPR:$src)))>; - def : Pat<(v8i16 (ARMvrev64 (v8i16 MQPR:$src))), - (v8i16 (MVE_VREV64_16 (v8i16 MQPR:$src)))>; - def : Pat<(v16i8 (ARMvrev64 (v16i8 MQPR:$src))), - (v16i8 (MVE_VREV64_8 (v16i8 MQPR:$src)))>; +multiclass MVE_VREV_basic_patterns VTIs, + Instruction Inst> { + defvar unpred_op = !cast("ARMvrev" # revbits); + + foreach VTI = VTIs in { + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src))), + (VTI.Vec (Inst (VTI.Vec MQPR:$src)))>; + def : Pat<(VTI.Vec (int_arm_mve_vrev_predicated (VTI.Vec MQPR:$src), + revbits, (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$src), ARMVCCThen, + (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>; + } +} - def : Pat<(v8i16 (ARMvrev32 (v8i16 MQPR:$src))), - (v8i16 (MVE_VREV32_16 (v8i16 MQPR:$src)))>; - def : Pat<(v16i8 (ARMvrev32 (v16i8 MQPR:$src))), - (v16i8 (MVE_VREV32_8 (v16i8 MQPR:$src)))>; +let Predicates = [HasMVEInt] in { + defm: MVE_VREV_basic_patterns<64, [MVE_v4i32, MVE_v4f32], MVE_VREV64_32>; + defm: MVE_VREV_basic_patterns<64, [MVE_v8i16, MVE_v8f16], MVE_VREV64_16>; + defm: MVE_VREV_basic_patterns<64, [MVE_v16i8 ], MVE_VREV64_8>; - def : Pat<(v16i8 (ARMvrev16 (v16i8 MQPR:$src))), - (v16i8 (MVE_VREV16_8 (v16i8 MQPR:$src)))>; + defm: MVE_VREV_basic_patterns<32, [MVE_v8i16, MVE_v8f16], MVE_VREV32_16>; + defm: MVE_VREV_basic_patterns<32, [MVE_v16i8 ], MVE_VREV32_8>; - def : Pat<(v4f32 (ARMvrev64 (v4f32 MQPR:$src))), - (v4f32 (MVE_VREV64_32 (v4f32 MQPR:$src)))>; - def : Pat<(v8f16 (ARMvrev64 (v8f16 MQPR:$src))), - (v8f16 (MVE_VREV64_16 (v8f16 MQPR:$src)))>; - def : Pat<(v8f16 (ARMvrev32 (v8f16 MQPR:$src))), - (v8f16 (MVE_VREV32_16 (v8f16 MQPR:$src)))>; + defm: MVE_VREV_basic_patterns<16, [MVE_v16i8 ], MVE_VREV16_8>; } def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), @@ -1356,14 +1357,14 @@ } let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (vnotq (v16i8 MQPR:$val1))), - (v16i8 (MVE_VMVN (v16i8 MQPR:$val1)))>; - def : Pat<(v8i16 (vnotq (v8i16 MQPR:$val1))), - (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>; - def : Pat<(v4i32 (vnotq (v4i32 MQPR:$val1))), - (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>; - def : Pat<(v2i64 (vnotq (v2i64 MQPR:$val1))), - (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>; + foreach VTI = [ MVE_v16i8, MVE_v8i16, MVE_v4i32, MVE_v2i64 ] in { + def : Pat<(VTI.Vec (vnotq (VTI.Vec MQPR:$val1))), + (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1)))>; + def : Pat<(VTI.Vec (int_arm_mve_mvn_predicated (VTI.Vec MQPR:$val1), + (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive))), + (VTI.Vec (MVE_VMVN (VTI.Vec MQPR:$val1), ARMVCCThen, + (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>; + } } class MVE_bit_ops bit_21_20, bit bit_28> @@ -2175,39 +2176,43 @@ let validForTailPredication = 1; } -def MVE_VCLSs8 : MVE_VCLSCLZ<"vcls", "s8", 0b00, 0b0>; -def MVE_VCLSs16 : MVE_VCLSCLZ<"vcls", "s16", 0b01, 0b0>; -def MVE_VCLSs32 : MVE_VCLSCLZ<"vcls", "s32", 0b10, 0b0>; +multiclass MVE_VCLSCLZ_p { + def "": MVE_VCLSCLZ<"v"#opname, VTI.Suffix, VTI.Size, opcode>; -def MVE_VCLZs8 : MVE_VCLSCLZ<"vclz", "i8", 0b00, 0b1>; -def MVE_VCLZs16 : MVE_VCLSCLZ<"vclz", "i16", 0b01, 0b1>; -def MVE_VCLZs32 : MVE_VCLSCLZ<"vclz", "i32", 0b10, 0b1>; + defvar Inst = !cast(NAME); + defvar pred_int = !cast("int_arm_mve_"#opname#"_predicated"); -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 ( ctlz (v16i8 MQPR:$val1))), - (v16i8 ( MVE_VCLZs8 (v16i8 MQPR:$val1)))>; - def : Pat<(v4i32 ( ctlz (v4i32 MQPR:$val1))), - (v4i32 ( MVE_VCLZs32 (v4i32 MQPR:$val1)))>; - def : Pat<(v8i16 ( ctlz (v8i16 MQPR:$val1))), - (v8i16 ( MVE_VCLZs16 (v8i16 MQPR:$val1)))>; - - def : Pat<(v16i8 ( int_arm_mve_vcls (v16i8 MQPR:$val1))), - (v16i8 ( MVE_VCLSs8 (v16i8 MQPR:$val1)))>; - def : Pat<(v4i32 ( int_arm_mve_vcls (v4i32 MQPR:$val1))), - (v4i32 ( MVE_VCLSs32 (v4i32 MQPR:$val1)))>; - def : Pat<(v8i16 ( int_arm_mve_vcls (v8i16 MQPR:$val1))), - (v8i16 ( MVE_VCLSs16 (v8i16 MQPR:$val1)))>; + let Predicates = [HasMVEInt] in { + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))), + (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen, + (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>; + } } +defm MVE_VCLSs8 : MVE_VCLSCLZ_p<"cls", 0, MVE_v16s8, int_arm_mve_vcls>; +defm MVE_VCLSs16 : MVE_VCLSCLZ_p<"cls", 0, MVE_v8s16, int_arm_mve_vcls>; +defm MVE_VCLSs32 : MVE_VCLSCLZ_p<"cls", 0, MVE_v4s32, int_arm_mve_vcls>; + +defm MVE_VCLZs8 : MVE_VCLSCLZ_p<"clz", 1, MVE_v16i8, ctlz>; +defm MVE_VCLZs16 : MVE_VCLSCLZ_p<"clz", 1, MVE_v8i16, ctlz>; +defm MVE_VCLZs32 : MVE_VCLSCLZ_p<"clz", 1, MVE_v4i32, ctlz>; + class MVE_VABSNEG_int size, bit negate, - list pattern=[]> + bit saturate, list pattern=[]> : MVEIntSingleSrc { let Inst{28} = 0b1; let Inst{25-23} = 0b111; let Inst{21-20} = 0b11; - let Inst{17-16} = 0b01; - let Inst{12-8} = 0b00011; + let Inst{17} = 0b0; + let Inst{16} = !eq(saturate, 0); + let Inst{12-11} = 0b00; + let Inst{10} = saturate; + let Inst{9-8} = 0b11; let Inst{7} = negate; let Inst{6} = 0b1; let Inst{4} = 0b0; @@ -2215,61 +2220,40 @@ let validForTailPredication = 1; } -def MVE_VABSs8 : MVE_VABSNEG_int<"vabs", "s8", 0b00, 0b0>; -def MVE_VABSs16 : MVE_VABSNEG_int<"vabs", "s16", 0b01, 0b0>; -def MVE_VABSs32 : MVE_VABSNEG_int<"vabs", "s32", 0b10, 0b0>; - -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (abs (v16i8 MQPR:$v))), - (v16i8 (MVE_VABSs8 $v))>; - def : Pat<(v8i16 (abs (v8i16 MQPR:$v))), - (v8i16 (MVE_VABSs16 $v))>; - def : Pat<(v4i32 (abs (v4i32 MQPR:$v))), - (v4i32 (MVE_VABSs32 $v))>; -} +multiclass MVE_VABSNEG_int_m { + def "" : MVE_VABSNEG_int; + defvar Inst = !cast(NAME); -def MVE_VNEGs8 : MVE_VABSNEG_int<"vneg", "s8", 0b00, 0b1>; -def MVE_VNEGs16 : MVE_VABSNEG_int<"vneg", "s16", 0b01, 0b1>; -def MVE_VNEGs32 : MVE_VABSNEG_int<"vneg", "s32", 0b10, 0b1>; + let Predicates = [HasMVEInt] in { + // VQABS and VQNEG have more difficult isel patterns defined elsewhere + if !eq(saturate, 0) then { + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>; + } -let Predicates = [HasMVEInt] in { - def : Pat<(v16i8 (vnegq (v16i8 MQPR:$v))), - (v16i8 (MVE_VNEGs8 $v))>; - def : Pat<(v8i16 (vnegq (v8i16 MQPR:$v))), - (v8i16 (MVE_VNEGs16 $v))>; - def : Pat<(v4i32 (vnegq (v4i32 MQPR:$v))), - (v4i32 (MVE_VNEGs32 $v))>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>; + } } -class MVE_VQABSNEG size, - bit negate, list pattern=[]> - : MVEIntSingleSrc { - - let Inst{28} = 0b1; - let Inst{25-23} = 0b111; - let Inst{21-20} = 0b11; - let Inst{17-16} = 0b00; - let Inst{12-8} = 0b00111; - let Inst{7} = negate; - let Inst{6} = 0b1; - let Inst{4} = 0b0; - let Inst{0} = 0b0; - let validForTailPredication = 1; +foreach VTI = [ MVE_v16s8, MVE_v8s16, MVE_v4s32 ] in { + defm "MVE_VABS" # VTI.Suffix : MVE_VABSNEG_int_m< + "vabs", 0, 0, abs, int_arm_mve_abs_predicated, VTI>; + defm "MVE_VQABS" # VTI.Suffix : MVE_VABSNEG_int_m< + "vqabs", 0, 1, ?, int_arm_mve_qabs_predicated, VTI>; + defm "MVE_VNEG" # VTI.Suffix : MVE_VABSNEG_int_m< + "vneg", 1, 0, vnegq, int_arm_mve_neg_predicated, VTI>; + defm "MVE_VQNEG" # VTI.Suffix : MVE_VABSNEG_int_m< + "vqneg", 1, 1, ?, int_arm_mve_qneg_predicated, VTI>; } -def MVE_VQABSs8 : MVE_VQABSNEG<"vqabs", "s8", 0b00, 0b0>; -def MVE_VQABSs16 : MVE_VQABSNEG<"vqabs", "s16", 0b01, 0b0>; -def MVE_VQABSs32 : MVE_VQABSNEG<"vqabs", "s32", 0b10, 0b0>; - -def MVE_VQNEGs8 : MVE_VQABSNEG<"vqneg", "s8", 0b00, 0b1>; -def MVE_VQNEGs16 : MVE_VQABSNEG<"vqneg", "s16", 0b01, 0b1>; -def MVE_VQNEGs32 : MVE_VQABSNEG<"vqneg", "s32", 0b10, 0b1>; - // int_min/int_max: vector containing INT_MIN/INT_MAX VTI.Size times // zero_vec: v4i32-initialized zero vector, potentially wrapped in a bitconvert multiclass vqabsneg_pattern { + dag zero_vec, MVE_VABSNEG_int vqabs_instruction, + MVE_VABSNEG_int vqneg_instruction> { let Predicates = [HasMVEInt] in { // The below tree can be replaced by a vqabs instruction, as it represents // the following vectorized expression (r being the value in $reg): @@ -2470,7 +2454,7 @@ let Inst{3-1} = Qm{2-0}; } -class MVE_VMOVL sz, bit U, +class MVE_VMOVL sz, bit U, bit top, list pattern=[]> : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm), iname, suffix, "$Qd, $Qm", vpred_r, "", @@ -2480,25 +2464,35 @@ let Inst{21} = 0b1; let Inst{20-19} = sz{1-0}; let Inst{18-16} = 0b000; + let Inst{12} = top; let Inst{11-6} = 0b111101; let Inst{4} = 0b0; let Inst{0} = 0b0; } -multiclass MVE_VMOVL_shift_half sz, bit U, - list pattern=[]> { - def bh : MVE_VMOVL { - let Inst{12} = 0b0; - } - def th : MVE_VMOVL { - let Inst{12} = 0b1; - } +multiclass MVE_VMOVL_m { + def "": MVE_VMOVL<"vmovl" # chr, InVTI.Suffix, OutVTI.Size, + InVTI.Unsigned, top>; + defvar Inst = !cast(NAME); + + def : Pat<(OutVTI.Vec (int_arm_mve_vmovl_predicated (InVTI.Vec MQPR:$src), + (i32 InVTI.Unsigned), (i32 top), + (OutVTI.Pred VCCR:$pred), + (OutVTI.Vec MQPR:$inactive))), + (OutVTI.Vec (Inst (InVTI.Vec MQPR:$src), ARMVCCThen, + (OutVTI.Pred VCCR:$pred), + (OutVTI.Vec MQPR:$inactive)))>; } -defm MVE_VMOVLs8 : MVE_VMOVL_shift_half<"vmovl", "s8", 0b01, 0b0>; -defm MVE_VMOVLu8 : MVE_VMOVL_shift_half<"vmovl", "u8", 0b01, 0b1>; -defm MVE_VMOVLs16 : MVE_VMOVL_shift_half<"vmovl", "s16", 0b10, 0b0>; -defm MVE_VMOVLu16 : MVE_VMOVL_shift_half<"vmovl", "u16", 0b10, 0b1>; +defm MVE_VMOVLs8bh : MVE_VMOVL_m<0, "b", MVE_v8s16, MVE_v16s8>; +defm MVE_VMOVLs8th : MVE_VMOVL_m<1, "t", MVE_v8s16, MVE_v16s8>; +defm MVE_VMOVLu8bh : MVE_VMOVL_m<0, "b", MVE_v8u16, MVE_v16u8>; +defm MVE_VMOVLu8th : MVE_VMOVL_m<1, "t", MVE_v8u16, MVE_v16u8>; +defm MVE_VMOVLs16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8s16>; +defm MVE_VMOVLs16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8s16>; +defm MVE_VMOVLu16bh : MVE_VMOVL_m<0, "b", MVE_v4s32, MVE_v8u16>; +defm MVE_VMOVLu16th : MVE_VMOVL_m<1, "t", MVE_v4s32, MVE_v8u16>; let Predicates = [HasMVEInt] in { def : Pat<(sext_inreg (v4i32 MQPR:$src), v4i16), @@ -3277,45 +3271,34 @@ } -multiclass MVE_VRINT_ops size, list pattern=[]> { - def N : MVE_VRINT<"n", 0b000, suffix, size, pattern>; - def X : MVE_VRINT<"x", 0b001, suffix, size, pattern>; - def A : MVE_VRINT<"a", 0b010, suffix, size, pattern>; - def Z : MVE_VRINT<"z", 0b011, suffix, size, pattern>; - def M : MVE_VRINT<"m", 0b101, suffix, size, pattern>; - def P : MVE_VRINT<"p", 0b111, suffix, size, pattern>; -} +multiclass MVE_VRINT_m opcode, + SDNode unpred_op> { + def "": MVE_VRINT; + defvar Inst = !cast(NAME); + defvar pred_int = !cast("int_arm_mve_vrint"#suffix#"_predicated"); -defm MVE_VRINTf16 : MVE_VRINT_ops<"f16", 0b01>; -defm MVE_VRINTf32 : MVE_VRINT_ops<"f32", 0b10>; + let Predicates = [HasMVEFloat] in { + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$val))), + (VTI.Vec (Inst (VTI.Vec MQPR:$val)))>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$val), (VTI.Pred VCCR:$pred), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst (VTI.Vec MQPR:$val), ARMVCCThen, + (VTI.Pred VCCR:$pred), (VTI.Vec MQPR:$inactive)))>; + } +} -let Predicates = [HasMVEFloat] in { - def : Pat<(v4f32 (frint (v4f32 MQPR:$val1))), - (v4f32 (MVE_VRINTf32X (v4f32 MQPR:$val1)))>; - def : Pat<(v8f16 (frint (v8f16 MQPR:$val1))), - (v8f16 (MVE_VRINTf16X (v8f16 MQPR:$val1)))>; - def : Pat<(v4f32 (fround (v4f32 MQPR:$val1))), - (v4f32 (MVE_VRINTf32A (v4f32 MQPR:$val1)))>; - def : Pat<(v8f16 (fround (v8f16 MQPR:$val1))), - (v8f16 (MVE_VRINTf16A (v8f16 MQPR:$val1)))>; - def : Pat<(v4f32 (ftrunc (v4f32 MQPR:$val1))), - (v4f32 (MVE_VRINTf32Z (v4f32 MQPR:$val1)))>; - def : Pat<(v8f16 (ftrunc (v8f16 MQPR:$val1))), - (v8f16 (MVE_VRINTf16Z (v8f16 MQPR:$val1)))>; - def : Pat<(v4f32 (ffloor (v4f32 MQPR:$val1))), - (v4f32 (MVE_VRINTf32M (v4f32 MQPR:$val1)))>; - def : Pat<(v8f16 (ffloor (v8f16 MQPR:$val1))), - (v8f16 (MVE_VRINTf16M (v8f16 MQPR:$val1)))>; - def : Pat<(v4f32 (fceil (v4f32 MQPR:$val1))), - (v4f32 (MVE_VRINTf32P (v4f32 MQPR:$val1)))>; - def : Pat<(v8f16 (fceil (v8f16 MQPR:$val1))), - (v8f16 (MVE_VRINTf16P (v8f16 MQPR:$val1)))>; - def : Pat<(v4f32 (int_arm_mve_vrintn (v4f32 MQPR:$val1))), - (v4f32 (MVE_VRINTf32N (v4f32 MQPR:$val1)))>; - def : Pat<(v8f16 (int_arm_mve_vrintn (v8f16 MQPR:$val1))), - (v8f16 (MVE_VRINTf16N (v8f16 MQPR:$val1)))>; +multiclass MVE_VRINT_ops { + defm N : MVE_VRINT_m; + defm X : MVE_VRINT_m; + defm A : MVE_VRINT_m; + defm Z : MVE_VRINT_m; + defm M : MVE_VRINT_m; + defm P : MVE_VRINT_m; } +defm MVE_VRINTf16 : MVE_VRINT_ops; +defm MVE_VRINTf32 : MVE_VRINT_ops; + class MVEFloatArithNeon pattern=[]> @@ -3692,7 +3675,7 @@ defm MVE_VCVTs32f32 : MVE_VCVT_fp_int_anpm_multi<"s32.f32", 0b10, 0b0>; defm MVE_VCVTu32f32 : MVE_VCVT_fp_int_anpm_multi<"u32.f32", 0b10, 0b1>; -class MVE_VCVT_fp_int size, bits<2> op, +class MVE_VCVT_fp_int size, bit toint, bit unsigned, list pattern=[]> : MVE_float<"vcvt", suffix, (outs MQPR:$Qd), (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> { @@ -3706,41 +3689,43 @@ let Inst{17-16} = 0b11; let Inst{15-13} = Qd{2-0}; let Inst{12-9} = 0b0011; - let Inst{8-7} = op; + let Inst{8} = toint; + let Inst{7} = unsigned; let Inst{4} = 0b0; let validForTailPredication = 1; } +multiclass MVE_VCVT_fp_int_m { + defvar Unsigned = !or(!eq(Dest.SuffixLetter,"u"), !eq(Src.SuffixLetter,"u")); + defvar ToInt = !eq(Src.SuffixLetter,"f"); + + def "" : MVE_VCVT_fp_int; + defvar Inst = !cast(NAME); + + let Predicates = [HasMVEFloat] in { + def : Pat<(Dest.Vec (unpred_op (Src.Vec MQPR:$src))), + (Dest.Vec (Inst (Src.Vec MQPR:$src)))>; + def : Pat<(Dest.Vec (int_arm_mve_vcvt_fp_int_predicated + (Src.Vec MQPR:$src), (i32 Unsigned), + (Src.Pred VCCR:$mask), (Dest.Vec MQPR:$inactive))), + (Dest.Vec (Inst (Src.Vec MQPR:$src), ARMVCCThen, + (Src.Pred VCCR:$mask), + (Dest.Vec MQPR:$inactive)))>; + } +} // The unsuffixed VCVT for float->int implicitly rounds toward zero, // which I reflect here in the llvm instruction names -def MVE_VCVTs16f16z : MVE_VCVT_fp_int<"s16.f16", 0b01, 0b10>; -def MVE_VCVTu16f16z : MVE_VCVT_fp_int<"u16.f16", 0b01, 0b11>; -def MVE_VCVTs32f32z : MVE_VCVT_fp_int<"s32.f32", 0b10, 0b10>; -def MVE_VCVTu32f32z : MVE_VCVT_fp_int<"u32.f32", 0b10, 0b11>; +defm MVE_VCVTs16f16z : MVE_VCVT_fp_int_m; +defm MVE_VCVTu16f16z : MVE_VCVT_fp_int_m; +defm MVE_VCVTs32f32z : MVE_VCVT_fp_int_m; +defm MVE_VCVTu32f32z : MVE_VCVT_fp_int_m; // Whereas VCVT for int->float rounds to nearest -def MVE_VCVTf16s16n : MVE_VCVT_fp_int<"f16.s16", 0b01, 0b00>; -def MVE_VCVTf16u16n : MVE_VCVT_fp_int<"f16.u16", 0b01, 0b01>; -def MVE_VCVTf32s32n : MVE_VCVT_fp_int<"f32.s32", 0b10, 0b00>; -def MVE_VCVTf32u32n : MVE_VCVT_fp_int<"f32.u32", 0b10, 0b01>; - -let Predicates = [HasMVEFloat] in { - def : Pat<(v4i32 (fp_to_sint (v4f32 MQPR:$src))), - (v4i32 (MVE_VCVTs32f32z (v4f32 MQPR:$src)))>; - def : Pat<(v4i32 (fp_to_uint (v4f32 MQPR:$src))), - (v4i32 (MVE_VCVTu32f32z (v4f32 MQPR:$src)))>; - def : Pat<(v8i16 (fp_to_sint (v8f16 MQPR:$src))), - (v8i16 (MVE_VCVTs16f16z (v8f16 MQPR:$src)))>; - def : Pat<(v8i16 (fp_to_uint (v8f16 MQPR:$src))), - (v8i16 (MVE_VCVTu16f16z (v8f16 MQPR:$src)))>; - def : Pat<(v4f32 (sint_to_fp (v4i32 MQPR:$src))), - (v4f32 (MVE_VCVTf32s32n (v4i32 MQPR:$src)))>; - def : Pat<(v4f32 (uint_to_fp (v4i32 MQPR:$src))), - (v4f32 (MVE_VCVTf32u32n (v4i32 MQPR:$src)))>; - def : Pat<(v8f16 (sint_to_fp (v8i16 MQPR:$src))), - (v8f16 (MVE_VCVTf16s16n (v8i16 MQPR:$src)))>; - def : Pat<(v8f16 (uint_to_fp (v8i16 MQPR:$src))), - (v8f16 (MVE_VCVTf16u16n (v8i16 MQPR:$src)))>; -} +defm MVE_VCVTf16s16n : MVE_VCVT_fp_int_m; +defm MVE_VCVTf16u16n : MVE_VCVT_fp_int_m; +defm MVE_VCVTf32s32n : MVE_VCVT_fp_int_m; +defm MVE_VCVTf32u32n : MVE_VCVT_fp_int_m; class MVE_VABSNEG_fp size, bit negate, list pattern=[]> @@ -3761,26 +3746,29 @@ let validForTailPredication = 1; } -def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>; -def MVE_VABSf32 : MVE_VABSNEG_fp<"vabs", "f32", 0b10, 0b0>; - -let Predicates = [HasMVEFloat] in { - def : Pat<(v8f16 (fabs MQPR:$src)), - (MVE_VABSf16 MQPR:$src)>; - def : Pat<(v4f32 (fabs MQPR:$src)), - (MVE_VABSf32 MQPR:$src)>; -} +multiclass MVE_VABSNEG_fp_m { + def "" : MVE_VABSNEG_fp; + defvar Inst = !cast(NAME); -def MVE_VNEGf16 : MVE_VABSNEG_fp<"vneg", "f16", 0b01, 0b1>; -def MVE_VNEGf32 : MVE_VABSNEG_fp<"vneg", "f32", 0b10, 0b1>; + let Predicates = [HasMVEInt] in { + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$v))), (VTI.Vec (Inst $v))>; -let Predicates = [HasMVEFloat] in { - def : Pat<(v8f16 (fneg MQPR:$src)), - (MVE_VNEGf16 MQPR:$src)>; - def : Pat<(v4f32 (fneg MQPR:$src)), - (MVE_VNEGf32 MQPR:$src)>; + def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$v), (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive))), + (VTI.Vec (Inst $v, ARMVCCThen, $mask, $inactive))>; + } } +defm MVE_VABSf16 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated, + MVE_v8f16, 0>; +defm MVE_VABSf32 : MVE_VABSNEG_fp_m<"vabs", fabs, int_arm_mve_abs_predicated, + MVE_v4f32, 0>; +defm MVE_VNEGf16 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated, + MVE_v8f16, 1>; +defm MVE_VNEGf32 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated, + MVE_v4f32, 1>; + class MVE_VMAXMINNMA pattern=[]> : MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm), @@ -4427,23 +4415,42 @@ defm MVE_VQMOVUNs32 : MVE_VxMOVxN_halves<"vqmovun", "s32", 0b0, 0b0, 0b01>; def MVEvmovn : SDNode<"ARMISD::VMOVN", SDTARMVEXT>; -let Predicates = [HasMVEInt] in { - def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 0))), - (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>; - def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))), - (v8i16 (MVE_VMOVNi32th (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>; - def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 0))), - (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>; - def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm), (i32 1))), - (v16i8 (MVE_VMOVNi16th (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>; - - def : Pat<(v8i16 (MVEvmovn (v8i16 MQPR:$Qm), - (v8i16 (ARMvrev32 MQPR:$Qd_src)), (i32 1))), - (v8i16 (MVE_VMOVNi32bh (v8i16 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>; - def : Pat<(v16i8 (MVEvmovn (v16i8 MQPR:$Qm), - (v16i8 (ARMvrev16 MQPR:$Qd_src)), (i32 1))), - (v16i8 (MVE_VMOVNi16bh (v16i8 MQPR:$Qd_src), (v16i8 MQPR:$Qm)))>; -} + +multiclass MVE_VMOVN_p { + // Match the most obvious MVEvmovn(a,b,t), which overwrites the odd or even + // lanes of a (depending on t) with the even lanes of b. + def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qd_src), + (VTI.Vec MQPR:$Qm), (i32 top))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>; + + if !eq(top, 0) then { + // If we see MVEvmovn(a,ARMvrev(b),1), that wants to overwrite the odd + // lanes of a with the odd lanes of b. In other words, the lanes we're + // _keeping_ from a are the even ones. So we can flip it round and say that + // this is the same as overwriting the even lanes of b with the even lanes + // of a, i.e. it's a VMOVNB with the operands reversed. + defvar vrev = !cast("ARMvrev" # InVTI.LaneBits); + def : Pat<(VTI.Vec (MVEvmovn (VTI.Vec MQPR:$Qm), + (VTI.Vec (vrev MQPR:$Qd_src)), (i32 1))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), (VTI.Vec MQPR:$Qm)))>; + } + + // Match the IR intrinsic for a predicated VMOVN. This regards the Qm input + // as having wider lanes that we're narrowing, instead of already-narrow + // lanes that we're taking every other one of. + def : Pat<(VTI.Vec (int_arm_mve_vmovn_predicated (VTI.Vec MQPR:$Qd_src), + (InVTI.Vec MQPR:$Qm), (i32 top), + (InVTI.Pred VCCR:$pred))), + (VTI.Vec (Inst (VTI.Vec MQPR:$Qd_src), + (InVTI.Vec MQPR:$Qm), + ARMVCCThen, (InVTI.Pred VCCR:$pred)))>; +} + +defm : MVE_VMOVN_p; +defm : MVE_VMOVN_p; +defm : MVE_VMOVN_p; +defm : MVE_VMOVN_p; class MVE_VCVT_ff @test_vmvnq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vmvnq_m_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vnegq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vnegq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vnegt.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vnegq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vnegq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vnegt.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vnegq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vnegq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vnegt.s8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vnegq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vnegq_m_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vnegt.s16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vnegq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vnegq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vnegt.s32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vabsq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vabsq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vabst.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vabsq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vabsq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vabst.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vabsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vabsq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vabst.s8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vabsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vabsq_m_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vabst.s16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vabsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vabsq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vabst.s32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqnegq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqnegq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqnegt.s8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqnegq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqnegq_m_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqnegt.s16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqnegq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqnegq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqnegt.s32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqabsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqabsq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqabst.s8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqabsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqabsq_m_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqabst.s16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqabsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqabsq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqabst.s32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) + +declare <16 x i8> @llvm.arm.mve.mvn.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.mvn.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.mvn.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) +declare <8 x half> @llvm.arm.mve.neg.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.neg.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) +declare <16 x i8> @llvm.arm.mve.neg.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.neg.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.neg.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) +declare <8 x half> @llvm.arm.mve.abs.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.abs.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) +declare <16 x i8> @llvm.arm.mve.abs.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.abs.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.abs.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) +declare <16 x i8> @llvm.arm.mve.qneg.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.qneg.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.qneg.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) +declare <16 x i8> @llvm.arm.mve.qabs.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.qabs.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.qabs.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vclzcls-predicated.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vclsq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclsq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclst.s8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vclsq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclsq_m_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclst.s16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vclsq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclsq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclst.s32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vclzq_m_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclzq_m_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclzt.i8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vclzq_m_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclzq_m_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclzt.i16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vclzq_m_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclzq_m_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclzt.i32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vclzq_m_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclzq_m_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclzt.i8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8> %a, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vclzq_m_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclzq_m_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclzt.i16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16> %a, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vclzq_m_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vclzq_m_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vclzt.i32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32> %a, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) +declare <16 x i8> @llvm.arm.mve.cls.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.cls.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.cls.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) +declare <16 x i8> @llvm.arm.mve.clz.predicated.v16i8.v16i1(<16 x i8>, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.clz.predicated.v8i16.v8i1(<8 x i16>, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.clz.predicated.v4i32.v4i1(<4 x i32>, <4 x i1>, <4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vcvt-fp-int.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_f16_s16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_f16_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.f16.s16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> %a, i32 0, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vcvtq_m_f16_u16(<8 x half> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_f16_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.f16.u16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> %a, i32 1, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_f32_s32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_f32_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.f32.s32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vcvtq_m_f32_u32(<4 x float> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_f32_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.f32.u32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32> %a, i32 1, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_s16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_s16_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.s16.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> %a, i32 0, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_s32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_s32_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.s32.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> %a, i32 0, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vcvtq_m_u16_f16(<8 x i16> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_u16_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.u16.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half> %a, i32 1, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vcvtq_m_u32_f32(<4 x i32> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vcvtq_m_u32_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcvtt.u32.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float> %a, i32 1, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) + +declare <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.vcvt.fp.int.predicated.v4f32.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x float>) +declare <8 x i16> @llvm.arm.mve.vcvt.fp.int.predicated.v8i16.v8f16.v8i1(<8 x half>, i32, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.vcvt.fp.int.predicated.v4i32.v4f32.v4i1(<4 x float>, i32, <4 x i1>, <4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovl.ll @@ -145,3 +145,200 @@ %1 = zext <4 x i16> %0 to <4 x i32> ret <4 x i32> %1 } + +define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_m_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovlbq_m_s8: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovlbt.s8 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovlbq_m_s8: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q0 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovlbt.s8 q2, q0 +; BE-NEXT: vrev64.16 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 0, i32 0, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_m_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovlbq_m_s16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovlbt.s16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovlbq_m_s16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q0 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovlbt.s16 q2, q0 +; BE-NEXT: vrev64.32 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 0, i32 0, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmovlbq_m_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovlbq_m_u8: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovlbt.u8 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovlbq_m_u8: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q0 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovlbt.u8 q2, q0 +; BE-NEXT: vrev64.16 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 0, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmovlbq_m_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovlbq_m_u16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovlbt.u16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovlbq_m_u16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q0 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovlbt.u16 q2, q0 +; BE-NEXT: vrev64.32 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 1, i32 0, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_m_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovltq_m_s8: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovltt.s8 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovltq_m_s8: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q0 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovltt.s8 q2, q0 +; BE-NEXT: vrev64.16 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 0, i32 1, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_m_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovltq_m_s16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovltt.s16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovltq_m_s16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q0 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovltt.s16 q2, q0 +; BE-NEXT: vrev64.32 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 0, i32 1, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmovltq_m_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovltq_m_u8: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovltt.u8 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovltq_m_u8: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q0 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovltt.u8 q2, q0 +; BE-NEXT: vrev64.16 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8> %a, i32 1, i32 1, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmovltq_m_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; LE-LABEL: test_vmovltq_m_u16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovltt.u16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovltq_m_u16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q0 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovltt.u16 q2, q0 +; BE-NEXT: vrev64.32 q0, q2 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16> %a, i32 1, i32 1, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) +declare <8 x i16> @llvm.arm.mve.vmovl.predicated.v8i16.v16i8.v8i1(<16 x i8>, i32, i32, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.vmovl.predicated.v4i32.v8i16.v4i1(<8 x i16>, i32, i32, <4 x i1>, <4 x i32>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vmovn.ll @@ -166,5 +166,201 @@ ret <8 x i16> %2 } +define arm_aapcs_vfpcc <16 x i8> @test_vmovnbq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovnbq_m_s16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovnbt.i16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovnbq_m_s16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q1 +; BE-NEXT: vrev64.8 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovnbt.i16 q1, q2 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmovnbq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovnbq_m_s32: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovnbt.i32 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovnbq_m_s32: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q1 +; BE-NEXT: vrev64.16 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovnbt.i32 q1, q2 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vmovnbq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovnbq_m_u16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovnbt.i16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovnbq_m_u16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q1 +; BE-NEXT: vrev64.8 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovnbt.i16 q1, q2 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 0, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmovnbq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovnbq_m_u32: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovnbt.i32 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovnbq_m_u32: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q1 +; BE-NEXT: vrev64.16 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovnbt.i32 q1, q2 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 0, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vmovntq_m_s16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovntq_m_s16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovntt.i16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovntq_m_s16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q1 +; BE-NEXT: vrev64.8 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovntt.i16 q1, q2 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmovntq_m_s32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovntq_m_s32: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovntt.i32 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovntq_m_s32: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q1 +; BE-NEXT: vrev64.16 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovntt.i32 q1, q2 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, <4 x i1> %1) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vmovntq_m_u16(<16 x i8> %a, <8 x i16> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovntq_m_u16: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovntt.i16 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovntq_m_u16: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.16 q2, q1 +; BE-NEXT: vrev64.8 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovntt.i16 q1, q2 +; BE-NEXT: vrev64.8 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8> %a, <8 x i16> %b, i32 1, <8 x i1> %1) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmovntq_m_u32(<8 x i16> %a, <4 x i32> %b, i16 zeroext %p) { +; LE-LABEL: test_vmovntq_m_u32: +; LE: @ %bb.0: @ %entry +; LE-NEXT: vmsr p0, r0 +; LE-NEXT: vpst +; LE-NEXT: vmovntt.i32 q0, q1 +; LE-NEXT: bx lr +; +; BE-LABEL: test_vmovntq_m_u32: +; BE: @ %bb.0: @ %entry +; BE-NEXT: vrev64.32 q2, q1 +; BE-NEXT: vrev64.16 q1, q0 +; BE-NEXT: vmsr p0, r0 +; BE-NEXT: vpst +; BE-NEXT: vmovntt.i32 q1, q2 +; BE-NEXT: vrev64.16 q0, q1 +; BE-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16> %a, <4 x i32> %b, i32 1, <4 x i1> %1) + ret <8 x i16> %2 +} + declare <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8>) declare <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16>) +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) +declare <16 x i8> @llvm.arm.mve.vmovn.predicated.v16i8.v8i16.v8i1(<16 x i8>, <8 x i16>, i32, <8 x i1>) +declare <8 x i16> @llvm.arm.mve.vmovn.predicated.v8i16.v4i32.v4i1(<8 x i16>, <4 x i32>, i32, <4 x i1>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrev.ll @@ -0,0 +1,138 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vrev16q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev16q_m_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev16t.8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 16, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrev32q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev32q_m_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev32t.8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 32, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrev32q_m_i16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev32q_m_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev32t.16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> %a, i32 32, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrev32q_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev32q_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev32t.16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> %a, i32 32, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrev64q_m_i8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev64q_m_i8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev64t.8 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8> %a, i32 64, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrev64q_m_i16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev64q_m_i16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev64t.16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16> %a, i32 64, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrev64q_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev64q_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev64t.16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half> %a, i32 64, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vrev64q_m_i32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev64q_m_i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev64t.32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32> %a, i32 64, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrev64q_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrev64q_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrev64t.32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float> %a, i32 64, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) + +declare <16 x i8> @llvm.arm.mve.vrev.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.vrev.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>) +declare <8 x half> @llvm.arm.mve.vrev.predicated.v8f16.v8i1(<8 x half>, i32, <8 x i1>, <8 x half>) +declare <4 x i32> @llvm.arm.mve.vrev.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>) +declare <4 x float> @llvm.arm.mve.vrev.predicated.v4f32.v4i1(<4 x float>, i32, <4 x i1>, <4 x float>) diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vrint-predicated.ll @@ -0,0 +1,185 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <8 x half> @test_vrndaq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndaq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintat.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndaq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndaq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintat.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndmq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndmq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintmt.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndmq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndmq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintmt.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndnq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndnq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintnt.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndnq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndnq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintnt.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndpq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndpq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintpt.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndpq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndpq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintpt.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintzt.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintzt.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +define arm_aapcs_vfpcc <8 x half> @test_vrndxq_m_f16(<8 x half> %inactive, <8 x half> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndxq_m_f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintxt.f16 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half> %a, <8 x i1> %1, <8 x half> %inactive) + ret <8 x half> %2 +} + +define arm_aapcs_vfpcc <4 x float> @test_vrndxq_m_f32(<4 x float> %inactive, <4 x float> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrndxq_m_f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrintxt.f32 q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float> %a, <4 x i1> %1, <4 x float> %inactive) + ret <4 x float> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) +declare <8 x half> @llvm.arm.mve.vrinta.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.vrinta.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.arm.mve.vrintm.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.vrintm.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.arm.mve.vrintn.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.vrintn.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.arm.mve.vrintp.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.vrintp.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.arm.mve.vrintz.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.vrintz.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>) +declare <8 x half> @llvm.arm.mve.vrintx.predicated.v8f16.v8i1(<8 x half>, <8 x i1>, <8 x half>) +declare <4 x float> @llvm.arm.mve.vrintx.predicated.v4f32.v4i1(<4 x float>, <4 x i1>, <4 x float>)