diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -609,6 +609,33 @@ defm vstrwq: scatter_offset_both; defm vstrdq: scatter_offset_both; +multiclass PredicatedImmediateVectorShift< + Immediate immtype, string predIntrName, list unsignedFlag = []> { + foreach predIntr = [IRInt] in { + def _m_n: Intrinsic; + def _x_n: Intrinsic; + } +} + +let params = T.Int in { + def vshlq_n: Intrinsic; + defm vshlq: PredicatedImmediateVectorShift; + + let pnt = PNT_NType in { + def vshrq_n: Intrinsic; + defm vshrq: PredicatedImmediateVectorShift; + } +} + // Base class for the scalar shift intrinsics. class ScalarShift: Intrinsic { diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -66,6 +66,10 @@ def sub: IRBuilder<"CreateSub">; def shl: IRBuilder<"CreateShl">; def lshr: IRBuilder<"CreateLShr">; +def immshr: CGHelperFn<"MVEImmediateShr"> { + let special_params = [IRBuilderIntParam<1, "unsigned">, + IRBuilderIntParam<2, "bool">]; +} def fadd: IRBuilder<"CreateFAdd">; def fmul: IRBuilder<"CreateFMul">; def fsub: IRBuilder<"CreateFSub">; @@ -318,8 +322,8 @@ // // imm_0toNm1 is the same but with the range offset by 1, i.e. 0 to N-1 // inclusive. -def imm_1toN : Immediate>; -def imm_0toNm1 : Immediate>; +def imm_1toN : Immediate>; +def imm_0toNm1 : Immediate>; // imm_lane has to be the index of a vector lane in the main vector type, i.e // it can range from 0 to (128 / size of scalar)-1 inclusive. (e.g. vgetq_lane) diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6802,6 +6802,15 @@ } } +template +static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) { + llvm::APSInt IntVal; + bool IsConst = E->isIntegerConstantExpr(IntVal, Context); + assert(IsConst && "Sema should have checked this was a constant"); + (void)IsConst; + return IntVal.getExtValue(); +} + static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V, llvm::Type *T, bool Unsigned) { // Helper function called by Tablegen-constructed ARM MVE builtin codegen, @@ -6809,6 +6818,27 @@ return Unsigned ? Builder.CreateZExt(V, T) : Builder.CreateSExt(V, T); } +static llvm::Value *MVEImmediateShr(CGBuilderTy &Builder, llvm::Value *V, + uint32_t Shift, bool Unsigned) { + // MVE helper function for integer shift right. This must handle signed vs + // unsigned, and also deal specially with the case where the shift count is + // equal to the lane size. In LLVM IR, an LShr with that parameter would be + // undefined behavior, but in MVE it's legal, so we must convert it to code + // that is not undefined in IR. + unsigned LaneBits = + V->getType()->getVectorElementType()->getPrimitiveSizeInBits(); + if (Shift == LaneBits) { + // An unsigned shift of the full lane size always generates zero, so we can + // simply emit a zero vector. A signed shift of the full lane size does the + // same thing as shifting by one bit fewer. + if (Unsigned) + return llvm::Constant::getNullValue(V->getType()); + else + --Shift; + } + return Unsigned ? Builder.CreateLShr(V, Shift) : Builder.CreateAShr(V, Shift); +} + static llvm::Value *ARMMVEVectorSplat(CGBuilderTy &Builder, llvm::Value *V) { // MVE-specific helper function for a vector splat, which infers the element // count of the output vector by knowing that MVE vectors are all 128 bits diff --git a/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/vector-shift-imm.c @@ -0,0 +1,722 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vshlq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vshlq_n_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 5); +#else /* POLYMORPHIC */ + return vshlq_n_s8(a, 5); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vshlq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 5); +#else /* POLYMORPHIC */ + return vshlq_n_s16(a, 5); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vshlq_n_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 18); +#else /* POLYMORPHIC */ + return vshlq_n_s32(a, 18); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_s8_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], zeroinitializer +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vshlq_n_s8_trivial(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 0); +#else /* POLYMORPHIC */ + return vshlq_n_s8(a, 0); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_s16_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], zeroinitializer +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vshlq_n_s16_trivial(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 0); +#else /* POLYMORPHIC */ + return vshlq_n_s16(a, 0); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_s32_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], zeroinitializer +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vshlq_n_s32_trivial(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 0); +#else /* POLYMORPHIC */ + return vshlq_n_s32(a, 0); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vshlq_n_u8(uint8x16_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 3); +#else /* POLYMORPHIC */ + return vshlq_n_u8(a, 3); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vshlq_n_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 11); +#else /* POLYMORPHIC */ + return vshlq_n_u16(a, 11); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vshlq_n_u32(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 7); +#else /* POLYMORPHIC */ + return vshlq_n_u32(a, 7); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_u8_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i8> [[A:%.*]], zeroinitializer +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vshlq_n_u8_trivial(uint8x16_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 0); +#else /* POLYMORPHIC */ + return vshlq_n_u8(a, 0); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_u16_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <8 x i16> [[A:%.*]], zeroinitializer +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vshlq_n_u16_trivial(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 0); +#else /* POLYMORPHIC */ + return vshlq_n_u16(a, 0); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_n_u32_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[A:%.*]], zeroinitializer +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vshlq_n_u32_trivial(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vshlq_n(a, 0); +#else /* POLYMORPHIC */ + return vshlq_n_u32(a, 0); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vshrq_n_s8(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 4); +#else /* POLYMORPHIC */ + return vshrq_n_s8(a, 4); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vshrq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 10); +#else /* POLYMORPHIC */ + return vshrq_n_s16(a, 10); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vshrq_n_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 19); +#else /* POLYMORPHIC */ + return vshrq_n_s32(a, 19); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_s8_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = ashr <16 x i8> [[A:%.*]], +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +int8x16_t test_vshrq_n_s8_trivial(int8x16_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 8); +#else /* POLYMORPHIC */ + return vshrq_n_s8(a, 8); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_s16_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = ashr <8 x i16> [[A:%.*]], +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vshrq_n_s16_trivial(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 16); +#else /* POLYMORPHIC */ + return vshrq_n_s16(a, 16); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_s32_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[A:%.*]], +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vshrq_n_s32_trivial(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 32); +#else /* POLYMORPHIC */ + return vshrq_n_s32(a, 32); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = lshr <16 x i8> [[A:%.*]], +// CHECK-NEXT: ret <16 x i8> [[TMP0]] +// +uint8x16_t test_vshrq_n_u8(uint8x16_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 1); +#else /* POLYMORPHIC */ + return vshrq_n_u8(a, 1); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = lshr <8 x i16> [[A:%.*]], +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vshrq_n_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 10); +#else /* POLYMORPHIC */ + return vshrq_n_u16(a, 10); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = lshr <4 x i32> [[A:%.*]], +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vshrq_n_u32(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 10); +#else /* POLYMORPHIC */ + return vshrq_n_u32(a, 10); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_u8_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret <16 x i8> zeroinitializer +// +uint8x16_t test_vshrq_n_u8_trivial(uint8x16_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 8); +#else /* POLYMORPHIC */ + return vshrq_n_u8(a, 8); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_u16_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret <8 x i16> zeroinitializer +// +uint16x8_t test_vshrq_n_u16_trivial(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 16); +#else /* POLYMORPHIC */ + return vshrq_n_u16(a, 16); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_n_u32_trivial( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret <4 x i32> zeroinitializer +// +uint32x4_t test_vshrq_n_u32_trivial(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vshrq(a, 32); +#else /* POLYMORPHIC */ + return vshrq_n_u32(a, 32); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_m_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 6, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vshlq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_m_n(inactive, a, 6, p); +#else /* POLYMORPHIC */ + return vshlq_m_n_s8(inactive, a, 6, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 13, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vshlq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_m_n(inactive, a, 13, p); +#else /* POLYMORPHIC */ + return vshlq_m_n_s16(inactive, a, 13, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vshlq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_m_n(inactive, a, 0, p); +#else /* POLYMORPHIC */ + return vshlq_m_n_s32(inactive, a, 0, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_m_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 3, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vshlq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_m_n(inactive, a, 3, p); +#else /* POLYMORPHIC */ + return vshlq_m_n_u8(inactive, a, 3, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vshlq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_m_n(inactive, a, 1, p); +#else /* POLYMORPHIC */ + return vshlq_m_n_u16(inactive, a, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 24, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vshlq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_m_n(inactive, a, 24, p); +#else /* POLYMORPHIC */ + return vshlq_m_n_u32(inactive, a, 24, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_m_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 2, i32 0, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vshrq_m_n_s8(int8x16_t inactive, int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_m(inactive, a, 2, p); +#else /* POLYMORPHIC */ + return vshrq_m_n_s8(inactive, a, 2, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 3, i32 0, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vshrq_m_n_s16(int16x8_t inactive, int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_m(inactive, a, 3, p); +#else /* POLYMORPHIC */ + return vshrq_m_n_s16(inactive, a, 3, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 13, i32 0, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vshrq_m_n_s32(int32x4_t inactive, int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_m(inactive, a, 13, p); +#else /* POLYMORPHIC */ + return vshrq_m_n_s32(inactive, a, 13, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_m_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 1, <16 x i1> [[TMP1]], <16 x i8> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vshrq_m_n_u8(uint8x16_t inactive, uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_m(inactive, a, 4, p); +#else /* POLYMORPHIC */ + return vshrq_m_n_u8(inactive, a, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 14, i32 1, <8 x i1> [[TMP1]], <8 x i16> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vshrq_m_n_u16(uint16x8_t inactive, uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_m(inactive, a, 14, p); +#else /* POLYMORPHIC */ + return vshrq_m_n_u16(inactive, a, 14, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 21, i32 1, <4 x i1> [[TMP1]], <4 x i32> [[INACTIVE:%.*]]) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vshrq_m_n_u32(uint32x4_t inactive, uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_m(inactive, a, 21, p); +#else /* POLYMORPHIC */ + return vshrq_m_n_u32(inactive, a, 21, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_x_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vshlq_x_n_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_x_n(a, 1, p); +#else /* POLYMORPHIC */ + return vshlq_x_n_s8(a, 1, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_x_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 15, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vshlq_x_n_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_x_n(a, 15, p); +#else /* POLYMORPHIC */ + return vshlq_x_n_s16(a, 15, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_x_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 13, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vshlq_x_n_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_x_n(a, 13, p); +#else /* POLYMORPHIC */ + return vshlq_x_n_s32(a, 13, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_x_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vshlq_x_n_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_x_n(a, 4, p); +#else /* POLYMORPHIC */ + return vshlq_x_n_u8(a, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_x_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vshlq_x_n_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_x_n(a, 10, p); +#else /* POLYMORPHIC */ + return vshlq_x_n_u16(a, 10, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshlq_x_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 30, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vshlq_x_n_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshlq_x_n(a, 30, p); +#else /* POLYMORPHIC */ + return vshlq_x_n_u32(a, 30, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_x_n_s8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 4, i32 0, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +int8x16_t test_vshrq_x_n_s8(int8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_x(a, 4, p); +#else /* POLYMORPHIC */ + return vshrq_x_n_s8(a, 4, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_x_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 10, i32 0, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vshrq_x_n_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_x(a, 10, p); +#else /* POLYMORPHIC */ + return vshrq_x_n_s16(a, 10, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_x_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 7, i32 0, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vshrq_x_n_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_x(a, 7, p); +#else /* POLYMORPHIC */ + return vshrq_x_n_s32(a, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_x_n_u8( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> [[A:%.*]], i32 7, i32 1, <16 x i1> [[TMP1]], <16 x i8> undef) +// CHECK-NEXT: ret <16 x i8> [[TMP2]] +// +uint8x16_t test_vshrq_x_n_u8(uint8x16_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_x(a, 7, p); +#else /* POLYMORPHIC */ + return vshrq_x_n_u8(a, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_x_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> [[A:%.*]], i32 7, i32 1, <8 x i1> [[TMP1]], <8 x i16> undef) +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vshrq_x_n_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_x(a, 7, p); +#else /* POLYMORPHIC */ + return vshrq_x_n_u16(a, 7, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vshrq_x_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> [[A:%.*]], i32 6, i32 1, <4 x i1> [[TMP1]], <4 x i32> undef) +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vshrq_x_n_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vshrq_x(a, 6, p); +#else /* POLYMORPHIC */ + return vshrq_x_n_u32(a, 6, p); +#endif /* POLYMORPHIC */ +} diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp --- a/clang/utils/TableGen/MveEmitter.cpp +++ b/clang/utils/TableGen/MveEmitter.cpp @@ -470,6 +470,10 @@ virtual void genCode(raw_ostream &OS, CodeGenParamAllocator &) const = 0; virtual bool hasIntegerConstantValue() const { return false; } virtual uint32_t integerConstantValue() const { return 0; } + virtual bool hasIntegerValue() const { return false; } + virtual std::string getIntegerValue(const std::string &) { + llvm_unreachable("non-working Result::getIntegerValue called"); + } virtual std::string typeName() const { return "Value *"; } // Mostly, when a code-generation operation has a dependency on prior @@ -544,8 +548,9 @@ public: unsigned ArgNum; bool AddressType; - BuiltinArgResult(unsigned ArgNum, bool AddressType) - : ArgNum(ArgNum), AddressType(AddressType) {} + bool Immediate; + BuiltinArgResult(unsigned ArgNum, bool AddressType, bool Immediate) + : ArgNum(ArgNum), AddressType(AddressType), Immediate(Immediate) {} void genCode(raw_ostream &OS, CodeGenParamAllocator &) const override { OS << (AddressType ? "EmitPointerWithAlignment" : "EmitScalarExpr") << "(E->getArg(" << ArgNum << "))"; @@ -559,6 +564,11 @@ return "(" + varname() + ".getPointer())"; return Result::asValue(); } + bool hasIntegerValue() const override { return Immediate; } + std::string getIntegerValue(const std::string &IntType) override { + return "GetIntegerConstantValue<" + IntType + ">(E->getArg(" + + utostr(ArgNum) + "), getContext())"; + } }; // Result subclass for an integer literal appearing in Tablegen. This may need @@ -633,27 +643,34 @@ StringRef CallPrefix; std::vector Args; std::set AddressArgs; - std::map IntConstantArgs; + std::map IntegerArgs; IRBuilderResult(StringRef CallPrefix, std::vector Args, std::set AddressArgs, - std::map IntConstantArgs) - : CallPrefix(CallPrefix), Args(Args), AddressArgs(AddressArgs), - IntConstantArgs(IntConstantArgs) {} + std::map IntegerArgs) + : CallPrefix(CallPrefix), Args(Args), AddressArgs(AddressArgs), + IntegerArgs(IntegerArgs) {} void genCode(raw_ostream &OS, CodeGenParamAllocator &ParamAlloc) const override { OS << CallPrefix; const char *Sep = ""; for (unsigned i = 0, e = Args.size(); i < e; ++i) { Ptr Arg = Args[i]; - auto it = IntConstantArgs.find(i); - if (it != IntConstantArgs.end()) { - assert(Arg->hasIntegerConstantValue()); - OS << Sep << "static_cast<" << it->second << ">(" - << ParamAlloc.allocParam("unsigned", - utostr(Arg->integerConstantValue())) - << ")"; + auto it = IntegerArgs.find(i); + + OS << Sep; + Sep = ", "; + + if (it != IntegerArgs.end()) { + if (Arg->hasIntegerConstantValue()) + OS << "static_cast<" << it->second << ">(" + << ParamAlloc.allocParam(it->second, + utostr(Arg->integerConstantValue())) + << ")"; + else if (Arg->hasIntegerValue()) + OS << ParamAlloc.allocParam(it->second, + Arg->getIntegerValue(it->second)); } else { - OS << Sep << Arg->varname(); + OS << Arg->varname(); } Sep = ", "; } @@ -662,7 +679,7 @@ void morePrerequisites(std::vector &output) const override { for (unsigned i = 0, e = Args.size(); i < e; ++i) { Ptr Arg = Args[i]; - if (IntConstantArgs.find(i) != IntConstantArgs.end()) + if (IntegerArgs.find(i) != IntegerArgs.end()) continue; output.push_back(Arg); } @@ -981,8 +998,8 @@ const Type *Param); Result::Ptr getCodeForDagArg(DagInit *D, unsigned ArgNum, const Result::Scope &Scope, const Type *Param); - Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType, - bool Promote); + Result::Ptr getCodeForArg(unsigned ArgNum, const Type *ArgType, bool Promote, + bool Immediate); // Constructor and top-level functions. @@ -1155,17 +1172,17 @@ Args.push_back(getCodeForDagArg(D, i, Scope, Param)); if (Op->isSubClassOf("IRBuilderBase")) { std::set AddressArgs; - std::map IntConstantArgs; + std::map IntegerArgs; for (Record *sp : Op->getValueAsListOfDefs("special_params")) { unsigned Index = sp->getValueAsInt("index"); if (sp->isSubClassOf("IRBuilderAddrParam")) { AddressArgs.insert(Index); } else if (sp->isSubClassOf("IRBuilderIntParam")) { - IntConstantArgs[Index] = sp->getValueAsString("type"); + IntegerArgs[Index] = sp->getValueAsString("type"); } } - return std::make_shared( - Op->getValueAsString("prefix"), Args, AddressArgs, IntConstantArgs); + return std::make_shared(Op->getValueAsString("prefix"), + Args, AddressArgs, IntegerArgs); } else if (Op->isSubClassOf("IRIntBase")) { std::vector ParamTypes; for (Record *RParam : Op->getValueAsListOfDefs("params")) @@ -1215,9 +1232,9 @@ } Result::Ptr MveEmitter::getCodeForArg(unsigned ArgNum, const Type *ArgType, - bool Promote) { - Result::Ptr V = - std::make_shared(ArgNum, isa(ArgType)); + bool Promote, bool Immediate) { + Result::Ptr V = std::make_shared( + ArgNum, isa(ArgType), Immediate); if (Promote) { if (const auto *ST = dyn_cast(ArgType)) { @@ -1291,17 +1308,14 @@ const Type *ArgType = ME.getType(TypeInit, Param); ArgTypes.push_back(ArgType); - // The argument will usually have a name in the arguments dag, which goes - // into the variable-name scope that the code gen will refer to. - StringRef ArgName = ArgsDag->getArgNameStr(i); - if (!ArgName.empty()) - Scope[ArgName] = ME.getCodeForArg(i, ArgType, Promote); - // If the argument is a subclass of Immediate, record the details about // what values it can take, for Sema checking. + bool Immediate = false; if (auto TypeDI = dyn_cast(TypeInit)) { Record *TypeRec = TypeDI->getDef(); if (TypeRec->isSubClassOf("Immediate")) { + Immediate = true; + Record *Bounds = TypeRec->getValueAsDef("bounds"); ImmediateArg &IA = ImmediateArgs[i]; if (Bounds->isSubClassOf("IB_ConstRange")) { @@ -1315,7 +1329,7 @@ IA.boundsType = ImmediateArg::BoundsType::ExplicitRange; IA.i1 = 0; IA.i2 = 128 / Param->sizeInBits() - 1; - } else if (Bounds->getName() == "IB_EltBit") { + } else if (Bounds->isSubClassOf("IB_EltBit")) { IA.boundsType = ImmediateArg::BoundsType::ExplicitRange; IA.i1 = Bounds->getValueAsInt("base"); IA.i2 = IA.i1 + Param->sizeInBits() - 1; @@ -1332,6 +1346,12 @@ } } } + + // The argument will usually have a name in the arguments dag, which goes + // into the variable-name scope that the code gen will refer to. + StringRef ArgName = ArgsDag->getArgNameStr(i); + if (!ArgName.empty()) + Scope[ArgName] = ME.getCodeForArg(i, ArgType, Promote, Immediate); } // Finally, go through the codegen dag and translate it into a Result object diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -913,6 +913,14 @@ [], [llvm_anyptr_ty, llvm_anyvector_ty, llvm_anyvector_ty, llvm_i32_ty, llvm_i32_ty], llvm_anyvector_ty, [IntrWriteMem]>; +def int_arm_mve_shl_imm_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_i32_ty, llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; +def int_arm_mve_shr_imm_predicated: Intrinsic<[llvm_anyvector_ty], + [LLVMMatchType<0>, llvm_i32_ty, llvm_i32_ty, // extra i32 is unsigned flag + llvm_anyvector_ty, LLVMMatchType<0>], + [IntrNoMem]>; + // MVE scalar shifts. class ARM_MVE_qrshift_single value, list saturate = []> : diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2816,27 +2816,39 @@ let Inst{21} = 0b1; } +multiclass MVE_immediate_shift_patterns_inner< + MVEVectorVTInfo VTI, Operand imm_operand_type, SDNode unpred_op, + Intrinsic pred_int, Instruction inst, list unsignedFlag = []> { + + def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$src), imm_operand_type:$imm)), + (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm))>; + + def : Pat<(VTI.Vec !con((pred_int (VTI.Vec MQPR:$src), imm_operand_type:$imm), + !dag(pred_int, unsignedFlag, ?), + (pred_int (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))), + (VTI.Vec (inst (VTI.Vec MQPR:$src), imm_operand_type:$imm, + ARMVCCThen, (VTI.Pred VCCR:$mask), + (VTI.Vec MQPR:$inactive)))>; +} + +multiclass MVE_immediate_shift_patterns { + defm : MVE_immediate_shift_patterns_inner("MVE_VSHL_immi" # VTI.BitsSuffix)>; + defm : MVE_immediate_shift_patterns_inner("MVE_VSHR_immu" # VTI.BitsSuffix), [1]>; + defm : MVE_immediate_shift_patterns_inner("MVE_VSHR_imms" # VTI.BitsSuffix), [0]>; +} + let Predicates = [HasMVEInt] in { - def : Pat<(v4i32 (ARMvshlImm (v4i32 MQPR:$src), imm0_31:$imm)), - (v4i32 (MVE_VSHL_immi32 (v4i32 MQPR:$src), imm0_31:$imm))>; - def : Pat<(v8i16 (ARMvshlImm (v8i16 MQPR:$src), imm0_15:$imm)), - (v8i16 (MVE_VSHL_immi16 (v8i16 MQPR:$src), imm0_15:$imm))>; - def : Pat<(v16i8 (ARMvshlImm (v16i8 MQPR:$src), imm0_7:$imm)), - (v16i8 (MVE_VSHL_immi8 (v16i8 MQPR:$src), imm0_7:$imm))>; - - def : Pat<(v4i32 (ARMvshruImm (v4i32 MQPR:$src), imm0_31:$imm)), - (v4i32 (MVE_VSHR_immu32 (v4i32 MQPR:$src), imm0_31:$imm))>; - def : Pat<(v8i16 (ARMvshruImm (v8i16 MQPR:$src), imm0_15:$imm)), - (v8i16 (MVE_VSHR_immu16 (v8i16 MQPR:$src), imm0_15:$imm))>; - def : Pat<(v16i8 (ARMvshruImm (v16i8 MQPR:$src), imm0_7:$imm)), - (v16i8 (MVE_VSHR_immu8 (v16i8 MQPR:$src), imm0_7:$imm))>; - - def : Pat<(v4i32 (ARMvshrsImm (v4i32 MQPR:$src), imm0_31:$imm)), - (v4i32 (MVE_VSHR_imms32 (v4i32 MQPR:$src), imm0_31:$imm))>; - def : Pat<(v8i16 (ARMvshrsImm (v8i16 MQPR:$src), imm0_15:$imm)), - (v8i16 (MVE_VSHR_imms16 (v8i16 MQPR:$src), imm0_15:$imm))>; - def : Pat<(v16i8 (ARMvshrsImm (v16i8 MQPR:$src), imm0_7:$imm)), - (v16i8 (MVE_VSHR_imms8 (v16i8 MQPR:$src), imm0_7:$imm))>; + defm : MVE_immediate_shift_patterns; + defm : MVE_immediate_shift_patterns; + defm : MVE_immediate_shift_patterns; } // end of mve_shift instructions diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/vector-shift-imm.ll @@ -0,0 +1,398 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <16 x i8> @test_vshlq_n_s8(<16 x i8> %a) { +; CHECK-LABEL: test_vshlq_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshl.i8 q0, q0, #5 +; CHECK-NEXT: bx lr +entry: + %0 = shl <16 x i8> %a, + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlq_n_s16(<8 x i16> %a) { +; CHECK-LABEL: test_vshlq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshl.i16 q0, q0, #5 +; CHECK-NEXT: bx lr +entry: + %0 = shl <8 x i16> %a, + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlq_n_s32(<4 x i32> %a) { +; CHECK-LABEL: test_vshlq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshl.i32 q0, q0, #18 +; CHECK-NEXT: bx lr +entry: + %0 = shl <4 x i32> %a, + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_s8(<16 x i8> %a) { +; CHECK-LABEL: test_vshrq_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshr.s8 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = ashr <16 x i8> %a, + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_s16(<8 x i16> %a) { +; CHECK-LABEL: test_vshrq_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshr.s16 q0, q0, #10 +; CHECK-NEXT: bx lr +entry: + %0 = ashr <8 x i16> %a, + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_s32(<4 x i32> %a) { +; CHECK-LABEL: test_vshrq_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshr.s32 q0, q0, #19 +; CHECK-NEXT: bx lr +entry: + %0 = ashr <4 x i32> %a, + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshrq_n_u8(<16 x i8> %a) { +; CHECK-LABEL: test_vshrq_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshr.u8 q0, q0, #1 +; CHECK-NEXT: bx lr +entry: + %0 = lshr <16 x i8> %a, + ret <16 x i8> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshrq_n_u16(<8 x i16> %a) { +; CHECK-LABEL: test_vshrq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshr.u16 q0, q0, #10 +; CHECK-NEXT: bx lr +entry: + %0 = lshr <8 x i16> %a, + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshrq_n_u32(<4 x i32> %a) { +; CHECK-LABEL: test_vshrq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vshr.u32 q0, q0, #10 +; CHECK-NEXT: bx lr +entry: + %0 = lshr <4 x i32> %a, + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshlq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlq_m_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlt.i8 q0, q1, #6 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 6, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlt.i16 q0, q1, #13 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 13, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlt.i32 q0, q1, #0 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 0, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_s8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshrq_m_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrt.s8 q0, q1, #2 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 2, i32 0, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_s16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshrq_m_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrt.s16 q0, q1, #3 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 3, i32 0, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_s32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshrq_m_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrt.s32 q0, q1, #13 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, i32 0, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshrq_m_n_u8(<16 x i8> %inactive, <16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshrq_m_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrt.u8 q0, q1, #4 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 1, <16 x i1> %1, <16 x i8> %inactive) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshrq_m_n_u16(<8 x i16> %inactive, <8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshrq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrt.u16 q0, q1, #14 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 14, i32 1, <8 x i1> %1, <8 x i16> %inactive) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshrq_m_n_u32(<4 x i32> %inactive, <4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshrq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrt.u32 q0, q1, #21 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 21, i32 1, <4 x i1> %1, <4 x i32> %inactive) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_s8(<16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlq_x_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlt.i8 q0, q0, #1 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 1, <16 x i1> %1, <16 x i8> undef) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_s16(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlq_x_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlt.i16 q0, q0, #15 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 15, <8 x i1> %1, <8 x i16> undef) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_s32(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlq_x_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlt.i32 q0, q0, #13 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 13, <4 x i1> %1, <4 x i32> undef) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshlq_x_n_u8(<16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlq_x_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlt.i8 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, <16 x i1> %1, <16 x i8> undef) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshlq_x_n_u16(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlq_x_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlt.i16 q0, q0, #10 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, <8 x i1> %1, <8 x i16> undef) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshlq_x_n_u32(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshlq_x_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshlt.i32 q0, q0, #30 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 30, <4 x i1> %1, <4 x i32> undef) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_s8(<16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshrq_x_n_s8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrt.s8 q0, q0, #4 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 4, i32 0, <16 x i1> %1, <16 x i8> undef) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_s16(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshrq_x_n_s16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrt.s16 q0, q0, #10 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 10, i32 0, <8 x i1> %1, <8 x i16> undef) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_s32(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshrq_x_n_s32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrt.s32 q0, q0, #7 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 7, i32 0, <4 x i1> %1, <4 x i32> undef) + ret <4 x i32> %2 +} + +define arm_aapcs_vfpcc <16 x i8> @test_vshrq_x_n_u8(<16 x i8> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshrq_x_n_u8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrt.u8 q0, q0, #7 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 %0) + %2 = tail call <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8> %a, i32 7, i32 1, <16 x i1> %1, <16 x i8> undef) + ret <16 x i8> %2 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vshrq_x_n_u16(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshrq_x_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrt.u16 q0, q0, #7 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = tail call <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16> %a, i32 7, i32 1, <8 x i1> %1, <8 x i16> undef) + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vshrq_x_n_u32(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vshrq_x_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vshrt.u32 q0, q0, #6 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = tail call <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32> %a, i32 6, i32 1, <4 x i1> %1, <4 x i32> undef) + ret <4 x i32> %2 +} + +declare <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32) +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32) + +declare <16 x i8> @llvm.arm.mve.shl.imm.predicated.v16i8.v16i1(<16 x i8>, i32, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.shl.imm.predicated.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.shl.imm.predicated.v4i32.v4i1(<4 x i32>, i32, <4 x i1>, <4 x i32>) + +declare <16 x i8> @llvm.arm.mve.shr.imm.predicated.v16i8.v16i1(<16 x i8>, i32, i32, <16 x i1>, <16 x i8>) +declare <8 x i16> @llvm.arm.mve.shr.imm.predicated.v8i16.v8i1(<8 x i16>, i32, i32, <8 x i1>, <8 x i16>) +declare <4 x i32> @llvm.arm.mve.shr.imm.predicated.v4i32.v4i1(<4 x i32>, i32, i32, <4 x i1>, <4 x i32>)