diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -627,6 +627,47 @@ } } +let params = T.Int in { + def vqshlq_n: Intrinsic $v, $sh, (unsignedflag Scalar))>; + def vqshlq_m_n: Intrinsic + $v, $sh, (unsignedflag Scalar), $pred, $inactive)>; + + let pnt = PNT_NType in { + def vrshrq_n: Intrinsic $v, $sh, (unsignedflag Scalar))>; + defm vrshrq: IntrinsicMX + $v, $sh, (unsignedflag Scalar), $pred, $inactive), "_n">; + } +} + +let params = T.Signed, pnt = PNT_NType in { + def vqshluq_n: Intrinsic $v, $sh)>; + def vqshluq_m_n: Intrinsic + $v, $sh, $pred, $inactive)>; +} + +multiclass vshll_imm { + let params = !listconcat(T.Int8, T.Int16), pnt = PNT_NType in { + def _n: Intrinsic + $v, $sh, (unsignedflag Scalar), top)>; + defm "": IntrinsicMX + $v, $sh, (unsignedflag Scalar), top, $pred, $inactive), "_n">; + } +} +defm vshllbq : vshll_imm<0>; +defm vshlltq : vshll_imm<1>; + // Base class for the scalar shift intrinsics. class ScalarShift: Intrinsic { diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c --- a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c +++ b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c @@ -720,3 +720,918 @@ return vshrq_x_n_u32(a, 6, p); #endif /* POLYMORPHIC */ } + +// CHECK-LABEL: @test_vqshlq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> [[A:%.*]], i32 3, i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vqshlq_n_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vqshlq_n(a, 3); +#else /* POLYMORPHIC */ + return vqshlq_n_s8(a, 3); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshlq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> [[A:%.*]], i32 4, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vqshlq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vqshlq_n(a, 4); +#else /* POLYMORPHIC */ + return vqshlq_n_s16(a, 4); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshlq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> [[A:%.*]], i32 4, i32 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vqshlq_n_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vqshlq_n(a, 4); +#else /* POLYMORPHIC */ + return vqshlq_n_s32(a, 4); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshlq_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> [[A:%.*]], i32 0, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vqshlq_n_u8(uint8x16_t a) +{ +#ifdef POLYMORPHIC + return vqshlq_n(a, 0); +#else /* POLYMORPHIC */ + return vqshlq_n_u8(a, 0); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshlq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> [[A:%.*]], i32 13, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vqshlq_n_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vqshlq_n(a, 13); +#else /* POLYMORPHIC */ + return vqshlq_n_u16(a, 13); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshlq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> [[A:%.*]], i32 6, i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vqshlq_n_u32(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vqshlq_n(a, 6); +#else /* POLYMORPHIC */ + return vqshlq_n_u32(a, 6); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshluq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vqshlu.imm.v16i8(<16 x i8> [[A:%.*]], i32 5) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vqshluq_n_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vqshluq(a, 5); +#else /* POLYMORPHIC */ + return vqshluq_n_s8(a, 5); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshluq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vqshlu.imm.v8i16(<8 x i16> [[A:%.*]], i32 5) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vqshluq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vqshluq(a, 5); +#else /* POLYMORPHIC */ + return vqshluq_n_s16(a, 5); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshluq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vqshlu.imm.v4i32(<4 x i32> [[A:%.*]], i32 4) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vqshluq_n_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vqshluq(a, 4); +#else /* POLYMORPHIC */ + return vqshluq_n_s32(a, 4); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8> [[A:%.*]], i32 4, i32 0) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vrshrq_n_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vrshrq(a, 4); +#else /* POLYMORPHIC */ + return vrshrq_n_s8(a, 4); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16> [[A:%.*]], i32 12, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vrshrq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vrshrq(a, 12); +#else /* POLYMORPHIC */ + return vrshrq_n_s16(a, 12); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32> [[A:%.*]], i32 30, i32 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vrshrq_n_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vrshrq(a, 30); +#else /* POLYMORPHIC */ + return vrshrq_n_s32(a, 30); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8> [[A:%.*]], i32 1, i32 1) +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vrshrq_n_u8(uint8x16_t a) +{ +#ifdef POLYMORPHIC + return vrshrq(a, 1); +#else /* POLYMORPHIC */ + return vrshrq_n_u8(a, 1); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16> [[A:%.*]], i32 15, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vrshrq_n_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vrshrq(a, 15); +#else /* POLYMORPHIC */ + return vrshrq_n_u16(a, 15); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32> [[A:%.*]], i32 20, i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vrshrq_n_u32(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vrshrq(a, 20); +#else /* POLYMORPHIC */ + return vrshrq_n_u32(a, 20); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshlq_m_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 6, i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vqshlq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshlq_m_n(inactive, a, 6, p); +#else /* POLYMORPHIC */ + return vqshlq_m_n_s8(inactive, a, 6, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshlq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 13, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vqshlq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshlq_m_n(inactive, a, 13, p); +#else /* POLYMORPHIC */ + return vqshlq_m_n_s16(inactive, a, 13, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshlq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 14, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vqshlq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshlq_m_n(inactive, a, 14, p); +#else /* POLYMORPHIC */ + return vqshlq_m_n_s32(inactive, a, 14, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshlq_m_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vqshlq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshlq_m_n(inactive, a, 4, p); +#else /* POLYMORPHIC */ + return vqshlq_m_n_u8(inactive, a, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshlq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 9, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vqshlq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshlq_m_n(inactive, a, 9, p); +#else /* POLYMORPHIC */ + return vqshlq_m_n_u16(inactive, a, 9, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshlq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 25, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vqshlq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshlq_m_n(inactive, a, 25, p); +#else /* POLYMORPHIC */ + return vqshlq_m_n_u32(inactive, a, 25, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshluq_m_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vqshlu.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vqshluq_m_n_s8(uint8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshluq_m(inactive, a, 2, p); +#else /* POLYMORPHIC */ + return vqshluq_m_n_s8(inactive, a, 2, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshluq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vqshlu.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 12, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vqshluq_m_n_s16(uint16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshluq_m(inactive, a, 12, p); +#else /* POLYMORPHIC */ + return vqshluq_m_n_s16(inactive, a, 12, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vqshluq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vqshlu.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 24, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vqshluq_m_n_s32(uint32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vqshluq_m(inactive, a, 24, p); +#else /* POLYMORPHIC */ + return vqshluq_m_n_s32(inactive, a, 24, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_m_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrshrq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrq_m(inactive, a, 2, p); +#else /* POLYMORPHIC */ + return vrshrq_m_n_s8(inactive, a, 2, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 11, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vrshrq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrq_m(inactive, a, 11, p); +#else /* POLYMORPHIC */ + return vrshrq_m_n_s16(inactive, a, 11, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 24, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vrshrq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrq_m(inactive, a, 24, p); +#else /* POLYMORPHIC */ + return vrshrq_m_n_s32(inactive, a, 24, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_m_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 7, i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrshrq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrq_m(inactive, a, 7, p); +#else /* POLYMORPHIC */ + return vrshrq_m_n_u8(inactive, a, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 4, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vrshrq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrq_m(inactive, a, 4, p); +#else /* POLYMORPHIC */ + return vrshrq_m_n_u16(inactive, a, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 27, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vrshrq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrq_m(inactive, a, 27, p); +#else /* POLYMORPHIC */ + return vrshrq_m_n_u32(inactive, a, 27, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_x_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 3, i32 0, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vrshrq_x_n_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrq_x(a, 3, p); +#else /* POLYMORPHIC */ + return vrshrq_x_n_s8(a, 3, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_x_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 12, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vrshrq_x_n_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrq_x(a, 12, p); +#else /* POLYMORPHIC */ + return vrshrq_x_n_s16(a, 12, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_x_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 20, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vrshrq_x_n_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrq_x(a, 20, p); +#else /* POLYMORPHIC */ + return vrshrq_x_n_s32(a, 20, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_x_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 1, i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vrshrq_x_n_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrq_x(a, 1, p); +#else /* POLYMORPHIC */ + return vrshrq_x_n_u8(a, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_x_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 13, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vrshrq_x_n_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrq_x(a, 13, p); +#else /* POLYMORPHIC */ + return vrshrq_x_n_u16(a, 13, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vrshrq_x_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 6, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vrshrq_x_n_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vrshrq_x(a, 6, p); +#else /* POLYMORPHIC */ + return vrshrq_x_n_u32(a, 6, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshllbq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> [[A:%.*]], i32 2, i32 0, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vshllbq_n_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vshllbq(a, 2); +#else /* POLYMORPHIC */ + return vshllbq_n_s8(a, 2); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshllbq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> [[A:%.*]], i32 13, i32 0, i32 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vshllbq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vshllbq(a, 13); +#else /* POLYMORPHIC */ + return vshllbq_n_s16(a, 13); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshllbq_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> [[A:%.*]], i32 5, i32 1, i32 0) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vshllbq_n_u8(uint8x16_t a) +{ +#ifdef POLYMORPHIC + return vshllbq(a, 5); +#else /* POLYMORPHIC */ + return vshllbq_n_u8(a, 5); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshllbq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> [[A:%.*]], i32 6, i32 1, i32 0) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vshllbq_n_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vshllbq(a, 6); +#else /* POLYMORPHIC */ + return vshllbq_n_u16(a, 6); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlltq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> [[A:%.*]], i32 7, i32 0, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vshlltq_n_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vshlltq(a, 7); +#else /* POLYMORPHIC */ + return vshlltq_n_s8(a, 7); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlltq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> [[A:%.*]], i32 2, i32 0, i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vshlltq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vshlltq(a, 2); +#else /* POLYMORPHIC */ + return vshlltq_n_s16(a, 2); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlltq_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> [[A:%.*]], i32 7, i32 1, i32 1) +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vshlltq_n_u8(uint8x16_t a) +{ +#ifdef POLYMORPHIC + return vshlltq(a, 7); +#else /* POLYMORPHIC */ + return vshlltq_n_u8(a, 7); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlltq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> [[A:%.*]], i32 14, i32 1, i32 1) +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vshlltq_n_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vshlltq(a, 14); +#else /* POLYMORPHIC */ + return vshlltq_n_u16(a, 14); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshllbq_m_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 6, i32 0, i32 0, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vshllbq_m_n_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshllbq_m(inactive, a, 6, p); +#else /* POLYMORPHIC */ + return vshllbq_m_n_s8(inactive, a, 6, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshllbq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 0, i32 0, <8 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vshllbq_m_n_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshllbq_m(inactive, a, 10, p); +#else /* POLYMORPHIC */ + return vshllbq_m_n_s16(inactive, a, 10, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshllbq_m_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 3, i32 1, i32 0, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vshllbq_m_n_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshllbq_m(inactive, a, 3, p); +#else /* POLYMORPHIC */ + return vshllbq_m_n_u8(inactive, a, 3, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshllbq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 14, i32 1, i32 0, <8 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vshllbq_m_n_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshllbq_m(inactive, a, 14, p); +#else /* POLYMORPHIC */ + return vshllbq_m_n_u16(inactive, a, 14, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlltq_m_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 0, i32 1, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vshlltq_m_n_s8(int16x8_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlltq_m(inactive, a, 4, p); +#else /* POLYMORPHIC */ + return vshlltq_m_n_s8(inactive, a, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlltq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 12, i32 0, i32 1, <8 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vshlltq_m_n_s16(int32x4_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlltq_m(inactive, a, 12, p); +#else /* POLYMORPHIC */ + return vshlltq_m_n_s16(inactive, a, 12, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlltq_m_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, i32 1, i32 1, <16 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vshlltq_m_n_u8(uint16x8_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlltq_m(inactive, a, 2, p); +#else /* POLYMORPHIC */ + return vshlltq_m_n_u8(inactive, a, 2, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlltq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 9, i32 1, i32 1, <8 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vshlltq_m_n_u16(uint32x4_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlltq_m(inactive, a, 9, p); +#else /* POLYMORPHIC */ + return vshlltq_m_n_u16(inactive, a, 9, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshllbq_x_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 1, i32 0, i32 0, <16 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vshllbq_x_n_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshllbq_x(a, 1, p); +#else /* POLYMORPHIC */ + return vshllbq_x_n_s8(a, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshllbq_x_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 0, i32 0, <8 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vshllbq_x_n_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshllbq_x(a, 10, p); +#else /* POLYMORPHIC */ + return vshllbq_x_n_s16(a, 10, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshllbq_x_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 6, i32 1, i32 0, <16 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vshllbq_x_n_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshllbq_x(a, 6, p); +#else /* POLYMORPHIC */ + return vshllbq_x_n_u8(a, 6, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshllbq_x_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 1, i32 0, <8 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vshllbq_x_n_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshllbq_x(a, 10, p); +#else /* POLYMORPHIC */ + return vshllbq_x_n_u16(a, 10, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlltq_x_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, i32 0, i32 1, <16 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vshlltq_x_n_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlltq_x(a, 2, p); +#else /* POLYMORPHIC */ + return vshlltq_x_n_s8(a, 2, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlltq_x_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 6, i32 0, i32 1, <8 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vshlltq_x_n_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlltq_x(a, 6, p); +#else /* POLYMORPHIC */ + return vshlltq_x_n_s16(a, 6, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlltq_x_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 5, i32 1, i32 1, <16 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vshlltq_x_n_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlltq_x(a, 5, p); +#else /* POLYMORPHIC */ + return vshlltq_x_n_u8(a, 5, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlltq_x_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 3, i32 1, i32 1, <8 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vshlltq_x_n_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlltq_x(a, 3, p); +#else /* POLYMORPHIC */ + return vshlltq_x_n_u16(a, 3, p); +#endif /* POLYMORPHIC */ +} + diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -876,10 +876,19 @@ [llvm_i32_ty, llvm_anyvector_ty], [IntrNoMem]>; multiclass MVEPredicated rets, list params, - LLVMType pred, list props = []> { + LLVMType pred = llvm_anyvector_ty, + list props = []> { def "": Intrinsic; def _predicated: Intrinsic; } +multiclass MVEPredicatedM rets, list params, + LLVMType pred = llvm_anyvector_ty, + list props = [IntrNoMem]> { + def "": Intrinsic; + def _predicated: Intrinsic(rets[0]), "llvm_anyvector_ty"), + LLVMMatchType<0>, rets[0])], props>; +} defm int_arm_mve_vcvt_narrow: MVEPredicated<[llvm_v8f16_ty], [llvm_v8f16_ty, llvm_v4f32_ty, llvm_i32_ty], llvm_v4i1_ty, [IntrNoMem]>; @@ -921,6 +930,16 @@ llvm_anyvector_ty, LLVMMatchType<0>], [IntrNoMem]>; +defm int_arm_mve_vqshl_imm: MVEPredicatedM<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>; +defm int_arm_mve_vrshr_imm: MVEPredicatedM<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/]>; +defm int_arm_mve_vqshlu_imm: MVEPredicatedM<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_i32_ty /*shiftcount*/]>; +defm int_arm_mve_vshll_imm: MVEPredicatedM<[llvm_anyvector_ty], + [llvm_anyvector_ty, llvm_i32_ty /*shiftcount*/, llvm_i32_ty /*unsigned*/, + llvm_i32_ty /*top-half*/]>; + // MVE scalar shifts. class ARM_MVE_qrshift_single value, list saturate = []> : diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -158,6 +158,9 @@ SDValue &OffReg, SDValue &ShImm); bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); + template + bool SelectImmediateInRange(SDValue N, SDValue &OffImm); + inline bool is_so_imm(unsigned Imm) const { return ARM_AM::getSOImmVal(Imm) != -1; } @@ -1382,6 +1385,16 @@ return false; } +template +bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { + int Val; + if (isScaledConstantInRange(N, 1, Min, Max, Val)) { + OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); + return true; + } + return false; +} + bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm) { diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -935,7 +935,10 @@ // encodings allow. let DiagnosticString = "operand must be an immediate in the range [1,8]"; } -def mve_shift_imm1_7 : Operand { +def mve_shift_imm1_7 : Operand, + // SelectImmediateInRange / isScaledConstantInRange uses a + // half-open interval, so the parameters <1,8> mean 1-7 inclusive + ComplexPattern", [], []> { let ParserMatchClass = MVEShiftImm1_7AsmOperand; let EncoderMethod = "getMVEShiftImmOpValue"; } @@ -948,7 +951,10 @@ // encodings allow. let DiagnosticString = "operand must be an immediate in the range [1,16]"; } -def mve_shift_imm1_15 : Operand { +def mve_shift_imm1_15 : Operand, + // SelectImmediateInRange / isScaledConstantInRange uses a + // half-open interval, so the parameters <1,16> mean 1-15 inclusive + ComplexPattern", [], []> { let ParserMatchClass = MVEShiftImm1_15AsmOperand; let EncoderMethod = "getMVEShiftImmOpValue"; } diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2315,8 +2315,8 @@ class MVE_VSHLL_imm pattern=[]> - : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$Qm), immops), + Operand immtype, list pattern=[]> + : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm, immtype:$imm), iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> { let Inst{28} = U; let Inst{25-23} = 0b101; @@ -2325,6 +2325,9 @@ let Inst{11-6} = 0b111101; let Inst{4} = 0b0; let Inst{0} = 0b0; + + // For the MVE_VSHLL_patterns multiclass to refer to + Operand immediateType = immtype; } // The immediate VSHLL instructions accept shift counts from 1 up to @@ -2333,7 +2336,7 @@ class MVE_VSHLL_imm8 pattern=[]> - : MVE_VSHLL_imm { + : MVE_VSHLL_imm { bits<3> imm; let Inst{20-19} = 0b01; let Inst{18-16} = imm; @@ -2341,7 +2344,7 @@ class MVE_VSHLL_imm16 pattern=[]> - : MVE_VSHLL_imm { + : MVE_VSHLL_imm { bits<4> imm; let Inst{20} = 0b1; let Inst{19-16} = imm; @@ -2385,6 +2388,45 @@ defm MVE_VSHLL_lwu8 : MVE_VSHLL_lw<"vshll", "u8", 0b00, 0b1, "$Qd, $Qm, #8">; defm MVE_VSHLL_lwu16 : MVE_VSHLL_lw<"vshll", "u16", 0b01, 0b1, "$Qd, $Qm, #16">; +multiclass MVE_VSHLL_patterns { + // A succession of local variable definitions, via singleton + // foreach, to make the actual patterns legible + foreach suffix = [!strconcat(VTI.Suffix, !if(top, "th", "bh"))] in + foreach inst_imm = [!cast("MVE_VSHLL_imm" # suffix)] in + foreach inst_lw = [!cast("MVE_VSHLL_lw" # suffix)] in + foreach unpred_int = [int_arm_mve_vshll_imm] in + foreach pred_int = [int_arm_mve_vshll_imm_predicated] in + foreach imm = [inst_imm.immediateType] in { + + def : Pat<(VTI.DblVec (unpred_int (VTI.Vec MQPR:$src), imm:$imm, + (i32 VTI.Unsigned), (i32 top))), + (VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm))>; + def : Pat<(VTI.DblVec (unpred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits), + (i32 VTI.Unsigned), (i32 top))), + (VTI.DblVec (inst_lw (VTI.Vec MQPR:$src)))>; + + def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), imm:$imm, + (i32 VTI.Unsigned), (i32 top), + (VTI.Pred VCCR:$mask), + (VTI.DblVec MQPR:$inactive))), + (VTI.DblVec (inst_imm (VTI.Vec MQPR:$src), imm:$imm, + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.DblVec MQPR:$inactive)))>; + def : Pat<(VTI.DblVec (pred_int (VTI.Vec MQPR:$src), (i32 VTI.LaneBits), + (i32 VTI.Unsigned), (i32 top), + (VTI.Pred VCCR:$mask), + (VTI.DblVec MQPR:$inactive))), + (VTI.DblVec (inst_lw (VTI.Vec MQPR:$src), ARMVCCThen, + (VTI.Pred VCCR:$mask), + (VTI.DblVec MQPR:$inactive)))>; + + } +} + +foreach VTI = [MVE_v16s8, MVE_v8s16, MVE_v16u8, MVE_v8u16] in + foreach top = [0, 1] in + defm : MVE_VSHLL_patterns; + class MVE_VxSHRN pattern=[]> : MVE_shift_imm<(outs MQPR:$Qd), !con((ins MQPR:$QdSrc, MQPR:$Qm), immops), @@ -2606,6 +2648,13 @@ let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b0; let validForTailPredication = 1; + + // For the MVE_shift_imm_patterns multiclass to refer to + MVEVectorVTInfo VTI; + Operand immediateType; + Intrinsic unpred_int; + Intrinsic pred_int; + dag unsignedFlag = (?); } class MVE_VSxI_imm @@ -2645,50 +2694,49 @@ let Inst{21} = 0b1; } -class MVE_VQSHL_imm - : MVE_shift_with_imm<"vqshl", suffix, (outs MQPR:$Qd), - !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm", +class MVE_VQSHL_imm + : MVE_shift_with_imm<"vqshl", VTI_.Suffix, (outs MQPR:$Qd), + (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm", vpred_r, ""> { bits<6> imm; + let Inst{28} = VTI_.Unsigned; let Inst{25-24} = 0b11; let Inst{21-16} = imm; let Inst{10-8} = 0b111; -} - -def MVE_VQSHLimms8 : MVE_VQSHL_imm<"s8", (ins imm0_7:$imm)> { - let Inst{28} = 0b0; - let Inst{21-19} = 0b001; -} - -def MVE_VQSHLimmu8 : MVE_VQSHL_imm<"u8", (ins imm0_7:$imm)> { - let Inst{28} = 0b1; - let Inst{21-19} = 0b001; -} - -def MVE_VQSHLimms16 : MVE_VQSHL_imm<"s16", (ins imm0_15:$imm)> { - let Inst{28} = 0b0; - let Inst{21-20} = 0b01; -} -def MVE_VQSHLimmu16 : MVE_VQSHL_imm<"u16", (ins imm0_15:$imm)> { - let Inst{28} = 0b1; - let Inst{21-20} = 0b01; -} - -def MVE_VQSHLimms32 : MVE_VQSHL_imm<"s32", (ins imm0_31:$imm)> { - let Inst{28} = 0b0; - let Inst{21} = 0b1; + let VTI = VTI_; + let immediateType = immType; + let unsignedFlag = (? (i32 VTI.Unsigned)); } -def MVE_VQSHLimmu32 : MVE_VQSHL_imm<"u32", (ins imm0_31:$imm)> { - let Inst{28} = 0b1; - let Inst{21} = 0b1; +let unpred_int = int_arm_mve_vqshl_imm, + pred_int = int_arm_mve_vqshl_imm_predicated in { + def MVE_VQSHLimms8 : MVE_VQSHL_imm { + let Inst{21-19} = 0b001; + } + def MVE_VQSHLimmu8 : MVE_VQSHL_imm { + let Inst{21-19} = 0b001; + } + + def MVE_VQSHLimms16 : MVE_VQSHL_imm { + let Inst{21-20} = 0b01; + } + def MVE_VQSHLimmu16 : MVE_VQSHL_imm { + let Inst{21-20} = 0b01; + } + + def MVE_VQSHLimms32 : MVE_VQSHL_imm { + let Inst{21} = 0b1; + } + def MVE_VQSHLimmu32 : MVE_VQSHL_imm { + let Inst{21} = 0b1; + } } -class MVE_VQSHLU_imm - : MVE_shift_with_imm<"vqshlu", suffix, (outs MQPR:$Qd), - !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm", +class MVE_VQSHLU_imm + : MVE_shift_with_imm<"vqshlu", VTI_.Suffix, (outs MQPR:$Qd), + (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm", vpred_r, ""> { bits<6> imm; @@ -2696,61 +2744,103 @@ let Inst{25-24} = 0b11; let Inst{21-16} = imm; let Inst{10-8} = 0b110; -} -def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm<"s8", (ins imm0_7:$imm)> { - let Inst{21-19} = 0b001; + let VTI = VTI_; + let immediateType = immType; } -def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm<"s16", (ins imm0_15:$imm)> { - let Inst{21-20} = 0b01; -} +let unpred_int = int_arm_mve_vqshlu_imm, + pred_int = int_arm_mve_vqshlu_imm_predicated in { + def MVE_VQSHLU_imms8 : MVE_VQSHLU_imm { + let Inst{21-19} = 0b001; + } -def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm<"s32", (ins imm0_31:$imm)> { - let Inst{21} = 0b1; + def MVE_VQSHLU_imms16 : MVE_VQSHLU_imm { + let Inst{21-20} = 0b01; + } + + def MVE_VQSHLU_imms32 : MVE_VQSHLU_imm { + let Inst{21} = 0b1; + } } -class MVE_VRSHR_imm - : MVE_shift_with_imm<"vrshr", suffix, (outs MQPR:$Qd), - !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm", +class MVE_VRSHR_imm + : MVE_shift_with_imm<"vrshr", VTI_.Suffix, (outs MQPR:$Qd), + (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm", vpred_r, ""> { bits<6> imm; + let Inst{28} = VTI_.Unsigned; let Inst{25-24} = 0b11; let Inst{21-16} = imm; let Inst{10-8} = 0b010; -} -def MVE_VRSHR_imms8 : MVE_VRSHR_imm<"s8", (ins shr_imm8:$imm)> { - let Inst{28} = 0b0; - let Inst{21-19} = 0b001; + let VTI = VTI_; + let immediateType = immType; + let unsignedFlag = (? (i32 VTI.Unsigned)); } -def MVE_VRSHR_immu8 : MVE_VRSHR_imm<"u8", (ins shr_imm8:$imm)> { - let Inst{28} = 0b1; - let Inst{21-19} = 0b001; -} +let unpred_int = int_arm_mve_vrshr_imm, + pred_int = int_arm_mve_vrshr_imm_predicated in { + def MVE_VRSHR_imms8 : MVE_VRSHR_imm { + let Inst{21-19} = 0b001; + } -def MVE_VRSHR_imms16 : MVE_VRSHR_imm<"s16", (ins shr_imm16:$imm)> { - let Inst{28} = 0b0; - let Inst{21-20} = 0b01; -} + def MVE_VRSHR_immu8 : MVE_VRSHR_imm { + let Inst{21-19} = 0b001; + } -def MVE_VRSHR_immu16 : MVE_VRSHR_imm<"u16", (ins shr_imm16:$imm)> { - let Inst{28} = 0b1; - let Inst{21-20} = 0b01; -} + def MVE_VRSHR_imms16 : MVE_VRSHR_imm { + let Inst{21-20} = 0b01; + } -def MVE_VRSHR_imms32 : MVE_VRSHR_imm<"s32", (ins shr_imm32:$imm)> { - let Inst{28} = 0b0; - let Inst{21} = 0b1; -} + def MVE_VRSHR_immu16 : MVE_VRSHR_imm { + let Inst{21-20} = 0b01; + } -def MVE_VRSHR_immu32 : MVE_VRSHR_imm<"u32", (ins shr_imm32:$imm)> { - let Inst{28} = 0b1; - let Inst{21} = 0b1; + def MVE_VRSHR_imms32 : MVE_VRSHR_imm { + let Inst{21} = 0b1; + } + + def MVE_VRSHR_immu32 : MVE_VRSHR_imm { + let Inst{21} = 0b1; + } } +multiclass MVE_shift_imm_patterns { + def : Pat<(inst.VTI.Vec !con((inst.unpred_int (inst.VTI.Vec MQPR:$src), + inst.immediateType:$imm), + inst.unsignedFlag)), + (inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src), + inst.immediateType:$imm))>; + + def : Pat<(inst.VTI.Vec !con((inst.pred_int (inst.VTI.Vec MQPR:$src), + inst.immediateType:$imm), + inst.unsignedFlag, + (? (inst.VTI.Pred VCCR:$mask), + (inst.VTI.Vec MQPR:$inactive)))), + (inst.VTI.Vec (inst (inst.VTI.Vec MQPR:$src), + inst.immediateType:$imm, + ARMVCCThen, (inst.VTI.Pred VCCR:$mask), + (inst.VTI.Vec MQPR:$inactive)))>; +} + +defm : MVE_shift_imm_patterns; +defm : MVE_shift_imm_patterns; +defm : MVE_shift_imm_patterns; +defm : MVE_shift_imm_patterns; +defm : MVE_shift_imm_patterns; +defm : MVE_shift_imm_patterns; +defm : MVE_shift_imm_patterns; +defm : MVE_shift_imm_patterns; +defm : MVE_shift_imm_patterns; +defm : MVE_shift_imm_patterns; +defm : MVE_shift_imm_patterns; +defm : MVE_shift_imm_patterns; +defm : MVE_shift_imm_patterns; +defm : MVE_shift_imm_patterns; +defm : MVE_shift_imm_patterns; + class MVE_VSHR_imm : MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd), !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm", diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll @@ -385,6 +385,1058 @@ ret <4 x i32> %2 } +define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_n_s8(<16 x i8> %a) { +; CHECK-LABEL: test_vqshlq_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshl.s8 q0, q0, #3 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> %a, i32 3, i32 0) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_n_s16(<8 x i16> %a) { +; CHECK-LABEL: test_vqshlq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshl.s16 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> %a, i32 4, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_n_s32(<4 x i32> %a) { +; CHECK-LABEL: test_vqshlq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshl.s32 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %a, i32 4, i32 0) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_n_u8(<16 x i8> %a) { +; CHECK-LABEL: test_vqshlq_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshl.u8 q0, q0, #0 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8> %a, i32 0, i32 1) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_n_u16(<8 x i16> %a) { +; CHECK-LABEL: test_vqshlq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshl.u16 q0, q0, #13 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16> %a, i32 13, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_n_u32(<4 x i32> %a) { +; CHECK-LABEL: test_vqshlq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshl.u32 q0, q0, #6 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32> %a, i32 6, i32 1) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshluq_n_s8(<16 x i8> %a) { +; CHECK-LABEL: test_vqshluq_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshlu.s8 q0, q0, #5 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <16 x i8> @llvm.arm.mve.vqshlu.imm.v16i8(<16 x i8> %a, i32 5) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshluq_n_s16(<8 x i16> %a) { +; CHECK-LABEL: test_vqshluq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshlu.s16 q0, q0, #5 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vqshlu.imm.v8i16(<8 x i16> %a, i32 5) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqshluq_n_s32(<4 x i32> %a) { +; CHECK-LABEL: test_vqshluq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vqshlu.s32 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vqshlu.imm.v4i32(<4 x i32> %a, i32 4) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_n_s8(<16 x i8> %a) { +; CHECK-LABEL: test_vrshrq_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrshr.s8 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8> %a, i32 4, i32 0) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_n_s16(<8 x i16> %a) { +; CHECK-LABEL: test_vrshrq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrshr.s16 q0, q0, #12 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16> %a, i32 12, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_n_s32(<4 x i32> %a) { +; CHECK-LABEL: test_vrshrq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrshr.s32 q0, q0, #30 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32> %a, i32 30, i32 0) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_n_u8(<16 x i8> %a) { +; CHECK-LABEL: test_vrshrq_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrshr.u8 q0, q0, #1 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8> %a, i32 1, i32 1) + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_n_u16(<8 x i16> %a) { +; CHECK-LABEL: test_vrshrq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrshr.u16 q0, q0, #15 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16> %a, i32 15, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_n_u32(<4 x i32> %a) { +; CHECK-LABEL: test_vrshrq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vrshr.u32 q0, q0, #20 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32> %a, i32 20, i32 1) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqshlq_m_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshlt.s8 q0, q1, #6 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 6, i32 0, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqshlq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshlt.s16 q0, q1, #13 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, i32 0, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqshlq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshlt.s32 q0, q1, #14 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 14, i32 0, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshlq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqshlq_m_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshlt.u8 q0, q1, #4 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 1, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshlq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqshlq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshlt.u16 q0, q1, #9 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 9, i32 1, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqshlq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqshlq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshlt.u32 q0, q1, #25 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 25, i32 1, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vqshluq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqshluq_m_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshlut.s8 q0, q1, #2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vqshlu.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vqshluq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqshluq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshlut.s16 q0, q1, #12 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vqshlu.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 12, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vqshluq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vqshluq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vqshlut.s32 q0, q1, #24 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vqshlu.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 24, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrq_m_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrt.s8 q0, q1, #2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrt.s16 q0, q1, #11 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 11, i32 0, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrt.s32 q0, q1, #24 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 24, i32 0, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrq_m_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrt.u8 q0, q1, #7 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 7, i32 1, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrt.u16 q0, q1, #4 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 4, i32 1, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrt.u32 q0, q1, #27 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 27, i32 1, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_x_n_s8(<16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrq_x_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrt.s8 q0, q0, #3 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 3, i32 0, <16 x i1> %1, <16 x i8> undef) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_x_n_s16(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrq_x_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrt.s16 q0, q0, #12 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 12, i32 0, <8 x i1> %1, <8 x i16> undef) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_x_n_s32(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrq_x_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrt.s32 q0, q0, #20 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 20, i32 0, <4 x i1> %1, <4 x i32> undef) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vrshrq_x_n_u8(<16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrq_x_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrt.u8 q0, q0, #1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, i32 1, <16 x i1> %1, <16 x i8> undef) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vrshrq_x_n_u16(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrq_x_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrt.u16 q0, q0, #13 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, i32 1, <8 x i1> %1, <8 x i16> undef) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vrshrq_x_n_u32(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vrshrq_x_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vrshrt.u32 q0, q0, #6 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6, i32 1, <4 x i1> %1, <4 x i32> undef) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_s8(<16 x i8> %a) { +; CHECK-LABEL: test_vshllbq_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllb.s8 q0, q0, #2 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 2, i32 0, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_s8_lanewidth(<16 x i8> %a) { +; CHECK-LABEL: test_vshllbq_n_s8_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllb.s8 q0, q0, #8 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 0, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_s16(<8 x i16> %a) { +; CHECK-LABEL: test_vshllbq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllb.s16 q0, q0, #13 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 13, i32 0, i32 0) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_s16_lanewidth(<8 x i16> %a) { +; CHECK-LABEL: test_vshllbq_n_s16_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllb.s16 q0, q0, #16 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 0, i32 0) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_u8(<16 x i8> %a) { +; CHECK-LABEL: test_vshllbq_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllb.u8 q0, q0, #5 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 5, i32 1, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_n_u8_lanewidth(<16 x i8> %a) { +; CHECK-LABEL: test_vshllbq_n_u8_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllb.u8 q0, q0, #8 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 1, i32 0) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_u16(<8 x i16> %a) { +; CHECK-LABEL: test_vshllbq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllb.u16 q0, q0, #6 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 6, i32 1, i32 0) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_n_u16_lanewidth(<8 x i16> %a) { +; CHECK-LABEL: test_vshllbq_n_u16_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllb.u16 q0, q0, #16 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 1, i32 0) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_s8(<16 x i8> %a) { +; CHECK-LABEL: test_vshlltq_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllt.s8 q0, q0, #7 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 7, i32 0, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_s8_lanewidth(<16 x i8> %a) { +; CHECK-LABEL: test_vshlltq_n_s8_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllt.s8 q0, q0, #8 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 0, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_s16(<8 x i16> %a) { +; CHECK-LABEL: test_vshlltq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllt.s16 q0, q0, #2 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 2, i32 0, i32 1) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_s16_lanewidth(<8 x i16> %a) { +; CHECK-LABEL: test_vshlltq_n_s16_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllt.s16 q0, q0, #16 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 0, i32 1) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_u8(<16 x i8> %a) { +; CHECK-LABEL: test_vshlltq_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllt.u8 q0, q0, #7 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 7, i32 1, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_n_u8_lanewidth(<16 x i8> %a) { +; CHECK-LABEL: test_vshlltq_n_u8_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllt.u8 q0, q0, #8 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8> %a, i32 8, i32 1, i32 1) + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_u16(<8 x i16> %a) { +; CHECK-LABEL: test_vshlltq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllt.u16 q0, q0, #14 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 14, i32 1, i32 1) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_n_u16_lanewidth(<8 x i16> %a) { +; CHECK-LABEL: test_vshlltq_n_u16_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshllt.u16 q0, q0, #16 +; CHECK-NEXT: bx lr +entry: + %0 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16> %a, i32 16, i32 1, i32 1) + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_m_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.s8 q0, q1, #6 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 6, i32 0, i32 0, <16 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_s8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_m_n_s8_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.s8 q0, q1, #8 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 0, i32 0, <16 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.s16 q0, q1, #10 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 10, i32 0, i32 0, <8 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_s16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_m_n_s16_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.s16 q0, q1, #16 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 0, i32 0, <8 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_m_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.u8 q0, q1, #3 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 3, i32 1, i32 0, <16 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_m_n_u8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_m_n_u8_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.u8 q0, q1, #8 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 1, i32 0, <16 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.u16 q0, q1, #14 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 14, i32 1, i32 0, <8 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_m_n_u16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_m_n_u16_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.u16 q0, q1, #16 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 1, i32 0, <8 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_s8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_m_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.s8 q0, q1, #4 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 4, i32 0, i32 1, <16 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_s8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_m_n_s8_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.s8 q0, q1, #8 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 0, i32 1, <16 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_s16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.s16 q0, q1, #12 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 12, i32 0, i32 1, <8 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_s16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_m_n_s16_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.s16 q0, q1, #16 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 0, i32 1, <8 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_u8(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_m_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.u8 q0, q1, #2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 2, i32 1, i32 1, <16 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_m_n_u8_lanewidth(<8 x i16> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_m_n_u8_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.u8 q0, q1, #8 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 1, i32 1, <16 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_u16(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.u16 q0, q1, #9 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 9, i32 1, i32 1, <8 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_m_n_u16_lanewidth(<4 x i32> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_m_n_u16_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.u16 q0, q1, #16 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 1, i32 1, <8 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_s8(<16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_x_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.s8 q0, q0, #1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 1, i32 0, i32 0, <16 x i1> %1, <8 x i16> undef) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_s8_lanewidth(<16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_x_n_s8_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.s8 q0, q0, #8 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 0, i32 0, <16 x i1> %1, <8 x i16> undef) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_s16(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_x_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.s16 q0, q0, #10 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 10, i32 0, i32 0, <8 x i1> %1, <4 x i32> undef) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_s16_lanewidth(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_x_n_s16_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.s16 q0, q0, #16 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 0, i32 0, <8 x i1> %1, <4 x i32> undef) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_u8(<16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_x_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.u8 q0, q0, #6 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 6, i32 1, i32 0, <16 x i1> %1, <8 x i16> undef) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshllbq_x_n_u8_lanewidth(<16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_x_n_u8_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.u8 q0, q0, #8 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 1, i32 0, <16 x i1> %1, <8 x i16> undef) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_u16(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_x_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.u16 q0, q0, #10 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 10, i32 1, i32 0, <8 x i1> %1, <4 x i32> undef) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshllbq_x_n_u16_lanewidth(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshllbq_x_n_u16_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshllbt.u16 q0, q0, #16 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 1, i32 0, <8 x i1> %1, <4 x i32> undef) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_s8(<16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_x_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.s8 q0, q0, #2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, i32 1, <16 x i1> %1, <8 x i16> undef) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_s8_lanewidth(<16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_x_n_s8_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.s8 q0, q0, #8 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 0, i32 1, <16 x i1> %1, <8 x i16> undef) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_s16(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_x_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.s16 q0, q0, #6 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 6, i32 0, i32 1, <8 x i1> %1, <4 x i32> undef) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_s16_lanewidth(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_x_n_s16_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.s16 q0, q0, #16 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 0, i32 1, <8 x i1> %1, <4 x i32> undef) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_u8(<16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_x_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.u8 q0, q0, #5 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 5, i32 1, i32 1, <16 x i1> %1, <8 x i16> undef) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlltq_x_n_u8_lanewidth(<16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_x_n_u8_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.u8 q0, q0, #8 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8> %a, i32 8, i32 1, i32 1, <16 x i1> %1, <8 x i16> undef) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_u16(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_x_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.u16 q0, q0, #3 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 3, i32 1, i32 1, <8 x i1> %1, <4 x i32> undef) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlltq_x_n_u16_lanewidth(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlltq_x_n_u16_lanewidth: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlltt.u16 q0, q0, #16 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16> %a, i32 16, i32 1, i32 1, <8 x i1> %1, <4 x i32> undef) + ret <4 x i32> %2 +} + declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) @@ -396,3 +1448,29 @@ declare <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>) declare <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>) declare <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>) + +declare <16 x i8> @llvm.arm.mve.vqshl.imm.v16i8(<16 x i8>, i32, i32) +declare <8 x i16> @llvm.arm.mve.vqshl.imm.v8i16(<8 x i16>, i32, i32) +declare <4 x i32> @llvm.arm.mve.vqshl.imm.v4i32(<4 x i32>, i32, i32) +declare <16 x i8> @llvm.arm.mve.vqshl.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.vqshl.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.vqshl.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>) + +declare <16 x i8> @llvm.arm.mve.vqshlu.imm.v16i8(<16 x i8>, i32) +declare <8 x i16> @llvm.arm.mve.vqshlu.imm.v8i16(<8 x i16>, i32) +declare <4 x i32> @llvm.arm.mve.vqshlu.imm.v4i32(<4 x i32>, i32) +declare <16 x i8> @llvm.arm.mve.vqshlu.imm.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.vqshlu.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.vqshlu.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>) + +declare <16 x i8> @llvm.arm.mve.vrshr.imm.v16i8(<16 x i8>, i32, i32) +declare <8 x i16> @llvm.arm.mve.vrshr.imm.v8i16(<8 x i16>, i32, i32) +declare <4 x i32> @llvm.arm.mve.vrshr.imm.v4i32(<4 x i32>, i32, i32) +declare <16 x i8> @llvm.arm.mve.vrshr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.vrshr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.vrshr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>) + +declare <8 x i16> @llvm.arm.mve.vshll.imm.v8i16.v16i8(<16 x i8>, i32, i32, i32) +declare <4 x i32> @llvm.arm.mve.vshll.imm.v4i32.v8i16(<8 x i16>, i32, i32, i32) +declare <8 x i16> @llvm.arm.mve.vshll.imm.predicated.v8i16.v16i8.v16i1(<16 x i8>, i32, i32, i32, <16 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.vshll.imm.predicated.v4i32.v8i16.v8i1(<8 x i16>, i32, i32, i32, <8 x i1>, <4 x i32>)