diff --git a/clang/include/clang/Basic/arm_mve.td b/clang/include/clang/Basic/arm_mve.td --- a/clang/include/clang/Basic/arm_mve.td +++ b/clang/include/clang/Basic/arm_mve.td @@ -116,6 +116,28 @@ NameOverride<"vmulq">; } +let params = !listconcat(T.Int16, T.Int32) in { + let pnt = PNT_None in { + def vmvnq_n: Intrinsic; + } + defm vmvnq: IntrinsicMX; + let pnt = PNT_NType in { + def vbicq_n: Intrinsic; + def vorrq_n: Intrinsic; + } + def vbicq_m_n: Intrinsic< + Vector, (args Vector:$v, imm_simd_restrictive:$imm, Predicate:$pred), + (select $pred, (and $v, (not (splat (Scalar $imm)))), $v)>; + def vorrq_m_n: Intrinsic< + Vector, (args Vector:$v, imm_simd_restrictive:$imm, Predicate:$pred), + (select $pred, (or $v, (splat (Scalar $imm))), $v)>; +} + // The bitcasting below is not overcomplicating the IR because while // Vector and UVector may be different vector types at the C level i.e. // vectors of same size signed/unsigned ints. Once they're lowered diff --git a/clang/include/clang/Basic/arm_mve_defs.td b/clang/include/clang/Basic/arm_mve_defs.td --- a/clang/include/clang/Basic/arm_mve_defs.td +++ b/clang/include/clang/Basic/arm_mve_defs.td @@ -319,6 +319,7 @@ int base = base_; Type type = type_; } +def IB_ExtraArg_LaneSize; // ----------------------------------------------------------------------------- // End-user definitions for immediate arguments. @@ -327,11 +328,13 @@ // intrinsics like vmvnq or vorrq. imm_simd_restrictive has to be an 8-bit // value shifted left by a whole number of bytes; imm_simd_vmvn can also be of // the form 0xXXFF for some byte value XX. -def imm_simd_restrictive : Immediate { +def imm_simd_restrictive : Immediate { let extra = "ShiftedByte"; + let extraarg = "!lanesize"; } -def imm_simd_vmvn : Immediate { +def imm_simd_vmvn : Immediate { let extra = "ShiftedByteOrXXFF"; + let extraarg = "!lanesize"; } // imm_1toN can take any value from 1 to N inclusive, where N is the number of @@ -457,26 +460,31 @@ // A wrapper to define both _m and _x versions of a predicated // intrinsic. +// +// We provide optional parameters to override the polymorphic name +// types separately for the _m and _x variants, because sometimes they +// polymorph differently (typically because the type of the inactive +// parameter can be used as a disambiguator if it's present). multiclass IntrinsicMX { // The _m variant takes an initial parameter called $inactive, which // provides the input value of the output register, i.e. all the // inactive lanes in the predicated operation take their values from // this. def "_m" # nameSuffix: - Intrinsic; + Intrinsic { + let pnt = pnt_m; + } foreach unusedVar = !if(!eq(wantXVariant, 1), [1], []) in { // The _x variant leaves off that parameter, and simply uses an // undef value of the same type. + def "_x" # nameSuffix: - Intrinsic { - // Allow overriding of the polymorphic name type, because - // sometimes the _m and _x variants polymorph differently - // (typically because the type of the inactive parameter can be - // used as a disambiguator if it's present). + Intrinsic { let pnt = pnt_x; } } diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -11670,8 +11670,10 @@ bool SemaBuiltinConstantArgMultiple(CallExpr *TheCall, int ArgNum, unsigned Multiple); bool SemaBuiltinConstantArgPower2(CallExpr *TheCall, int ArgNum); - bool SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum); - bool SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, int ArgNum); + bool SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum, + unsigned ArgBits); + bool SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, int ArgNum, + unsigned ArgBits); bool SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall, int ArgNum, unsigned ExpectedFieldNum, bool AllowName); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -5592,7 +5592,8 @@ /// SemaBuiltinConstantArgShiftedByte - Check if argument ArgNum of TheCall is /// a constant expression representing an arbitrary byte value shifted left by /// a multiple of 8 bits. -bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum) { +bool Sema::SemaBuiltinConstantArgShiftedByte(CallExpr *TheCall, int ArgNum, + unsigned ArgBits) { llvm::APSInt Result; // We can't check the value of a dependent argument. @@ -5604,6 +5605,10 @@ if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) return true; + // Truncate to the given size. + Result = Result.getLoBits(ArgBits); + Result.setIsUnsigned(true); + if (IsShiftedByte(Result)) return false; @@ -5617,7 +5622,8 @@ /// 0x00FF, 0x01FF, ..., 0xFFFF). This strange range check is needed for some /// Arm MVE intrinsics. bool Sema::SemaBuiltinConstantArgShiftedByteOrXXFF(CallExpr *TheCall, - int ArgNum) { + int ArgNum, + unsigned ArgBits) { llvm::APSInt Result; // We can't check the value of a dependent argument. @@ -5629,6 +5635,10 @@ if (SemaBuiltinConstantArg(TheCall, ArgNum, Result)) return true; + // Truncate to the given size. + Result = Result.getLoBits(ArgBits); + Result.setIsUnsigned(true); + // Check to see if it's in either of the required forms. if (IsShiftedByte(Result) || (Result > 0 && Result < 0x10000 && (Result & 0xFF) == 0xFF)) diff --git a/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c b/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/arm-mve-intrinsics/bitwise-imm.c @@ -0,0 +1,402 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s +// RUN: %clang_cc1 -triple thumbv8.1m.main-arm-none-eabi -target-feature +mve.fp -mfloat-abi hard -fallow-half-arguments-and-returns -O0 -disable-O0-optnone -DPOLYMORPHIC -S -emit-llvm -o - %s | opt -S -mem2reg | FileCheck %s + +#include + +// CHECK-LABEL: @test_vbicq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vbicq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vbicq(a, 0xd500); +#else /* POLYMORPHIC */ + return vbicq_n_s16(a, 0xd500); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vbicq_n_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vbicq(a, 0xfb); +#else /* POLYMORPHIC */ + return vbicq_n_s32(a, 0xfb); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vbicq_n_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vbicq(a, 0xf2); +#else /* POLYMORPHIC */ + return vbicq_n_u16(a, 0xf2); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vbicq_n_u32(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vbicq(a, 0x2000); +#else /* POLYMORPHIC */ + return vbicq_n_u32(a, 0x2000); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +int16x8_t test_vorrq_n_s16(int16x8_t a) +{ +#ifdef POLYMORPHIC + return vorrq(a, 0xc3); +#else /* POLYMORPHIC */ + return vorrq_n_s16(a, 0xc3); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +int32x4_t test_vorrq_n_s32(int32x4_t a) +{ +#ifdef POLYMORPHIC + return vorrq(a, 0x10000); +#else /* POLYMORPHIC */ + return vorrq_n_s32(a, 0x10000); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT: ret <8 x i16> [[TMP0]] +// +uint16x8_t test_vorrq_n_u16(uint16x8_t a) +{ +#ifdef POLYMORPHIC + return vorrq(a, 0xf000); +#else /* POLYMORPHIC */ + return vorrq_n_u16(a, 0xf000); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT: ret <4 x i32> [[TMP0]] +// +uint32x4_t test_vorrq_n_u32(uint32x4_t a) +{ +#ifdef POLYMORPHIC + return vorrq(a, 0x890000); +#else /* POLYMORPHIC */ + return vorrq_n_u32(a, 0x890000); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret <8 x i16> +// +int16x8_t test_vmvnq_n_s16() +{ + return vmvnq_n_s16(0x9500); +} + +// CHECK-LABEL: @test_vmvnq_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret <4 x i32> +// +int32x4_t test_vmvnq_n_s32() +{ + return vmvnq_n_s32(0x550000); +} + +// CHECK-LABEL: @test_vmvnq_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret <8 x i16> +// +uint16x8_t test_vmvnq_n_u16() +{ + return vmvnq_n_u16(0x4900); +} + +// CHECK-LABEL: @test_vmvnq_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: ret <4 x i32> +// +uint32x4_t test_vmvnq_n_u32() +{ + return vmvnq_n_u32(0xc3000000); +} + +// CHECK-LABEL: @test_vbicq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] +// CHECK-NEXT: ret <8 x i16> [[TMP3]] +// +int16x8_t test_vbicq_m_n_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vbicq_m_n(a, 0x2c00, p); +#else /* POLYMORPHIC */ + return vbicq_m_n_s16(a, 0x2c00, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] +// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// +int32x4_t test_vbicq_m_n_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vbicq_m_n(a, 0xd40000, p); +#else /* POLYMORPHIC */ + return vbicq_m_n_s32(a, 0xd40000, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = and <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] +// CHECK-NEXT: ret <8 x i16> [[TMP3]] +// +uint16x8_t test_vbicq_m_n_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vbicq_m_n(a, 0x24, p); +#else /* POLYMORPHIC */ + return vbicq_m_n_u16(a, 0x24, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vbicq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] +// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// +uint32x4_t test_vbicq_m_n_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vbicq_m_n(a, 0x62000000, p); +#else /* POLYMORPHIC */ + return vbicq_m_n_u32(a, 0x62000000, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] +// CHECK-NEXT: ret <8 x i16> [[TMP3]] +// +int16x8_t test_vorrq_m_n_s16(int16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vorrq_m_n(a, 0x3500, p); +#else /* POLYMORPHIC */ + return vorrq_m_n_s16(a, 0x3500, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] +// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// +int32x4_t test_vorrq_m_n_s32(int32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vorrq_m_n(a, 0x27000000, p); +#else /* POLYMORPHIC */ + return vorrq_m_n_s32(a, 0x27000000, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[A:%.*]], +// CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[TMP2]], <8 x i16> [[A]] +// CHECK-NEXT: ret <8 x i16> [[TMP3]] +// +uint16x8_t test_vorrq_m_n_u16(uint16x8_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vorrq_m_n(a, 0xaf, p); +#else /* POLYMORPHIC */ + return vorrq_m_n_u16(a, 0xaf, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vorrq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[A:%.*]], +// CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP2]], <4 x i32> [[A]] +// CHECK-NEXT: ret <4 x i32> [[TMP3]] +// +uint32x4_t test_vorrq_m_n_u32(uint32x4_t a, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vorrq_m_n(a, 0x59, p); +#else /* POLYMORPHIC */ + return vorrq_m_n_u32(a, 0x59, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> [[INACTIVE:%.*]] +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmvnq_m_n_s16(int16x8_t inactive, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, 0xf00, p); +#else /* POLYMORPHIC */ + return vmvnq_m_n_s16(inactive, 0xf00, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[INACTIVE:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmvnq_m_n_s32(int32x4_t inactive, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, 0x4a00, p); +#else /* POLYMORPHIC */ + return vmvnq_m_n_s32(inactive, 0x4a00, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> [[INACTIVE:%.*]] +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmvnq_m_n_u16(uint16x8_t inactive, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, 0xa500, p); +#else /* POLYMORPHIC */ + return vmvnq_m_n_u16(inactive, 0xa500, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_m_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> [[INACTIVE:%.*]] +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmvnq_m_n_u32(uint32x4_t inactive, mve_pred16_t p) +{ +#ifdef POLYMORPHIC + return vmvnq_m(inactive, 0xf800, p); +#else /* POLYMORPHIC */ + return vmvnq_m_n_u32(inactive, 0xf800, p); +#endif /* POLYMORPHIC */ +} + +// CHECK-LABEL: @test_vmvnq_x_n_s16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> undef +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +int16x8_t test_vmvnq_x_n_s16(mve_pred16_t p) +{ + return vmvnq_x_n_s16(0xfd00, p); +} + +// CHECK-LABEL: @test_vmvnq_x_n_s32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> undef +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +int32x4_t test_vmvnq_x_n_s32(mve_pred16_t p) +{ + return vmvnq_x_n_s32(0xba0000, p); +} + +// CHECK-LABEL: @test_vmvnq_x_n_u16( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> , <8 x i16> undef +// CHECK-NEXT: ret <8 x i16> [[TMP2]] +// +uint16x8_t test_vmvnq_x_n_u16(mve_pred16_t p) +{ + return vmvnq_x_n_u16(0x5400, p); +} + +// CHECK-LABEL: @test_vmvnq_x_n_u32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[P:%.*]] to i32 +// CHECK-NEXT: [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]]) +// CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> , <4 x i32> undef +// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// +uint32x4_t test_vmvnq_x_n_u32(mve_pred16_t p) +{ + return vmvnq_x_n_u32(0x1300, p); +} + diff --git a/clang/test/Sema/arm-mve-immediates.c b/clang/test/Sema/arm-mve-immediates.c --- a/clang/test/Sema/arm-mve-immediates.c +++ b/clang/test/Sema/arm-mve-immediates.c @@ -203,3 +203,73 @@ vsriq(vw, vw, 0); // expected-error {{argument value 0 is outside the valid range [1, 32]}} vsriq(vw, vw, 33); // expected-error {{argument value 33 is outside the valid range [1, 32]}} } + +void test_simd_bic_orr(int16x8_t h, int32x4_t w) +{ + h = vbicq(h, 0x0000); + h = vbicq(h, 0x0001); + h = vbicq(h, 0x00FF); + h = vbicq(h, 0x0100); + h = vbicq(h, 0x0101); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} + h = vbicq(h, 0x01FF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} + h = vbicq(h, 0xFF00); + + w = vbicq(w, 0x00000000); + w = vbicq(w, 0x00000001); + w = vbicq(w, 0x000000FF); + w = vbicq(w, 0x00000100); + w = vbicq(w, 0x0000FF00); + w = vbicq(w, 0x00010000); + w = vbicq(w, 0x00FF0000); + w = vbicq(w, 0x01000000); + w = vbicq(w, 0xFF000000); + w = vbicq(w, 0x01000001); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} + w = vbicq(w, 0x01FFFFFF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} + + h = vorrq(h, 0x0000); + h = vorrq(h, 0x0001); + h = vorrq(h, 0x00FF); + h = vorrq(h, 0x0100); + h = vorrq(h, 0x0101); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} + h = vorrq(h, 0x01FF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} + h = vorrq(h, 0xFF00); + + w = vorrq(w, 0x00000000); + w = vorrq(w, 0x00000001); + w = vorrq(w, 0x000000FF); + w = vorrq(w, 0x00000100); + w = vorrq(w, 0x0000FF00); + w = vorrq(w, 0x00010000); + w = vorrq(w, 0x00FF0000); + w = vorrq(w, 0x01000000); + w = vorrq(w, 0xFF000000); + w = vorrq(w, 0x01000001); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} + w = vorrq(w, 0x01FFFFFF); // expected-error-re {{argument should be an 8-bit value shifted by a multiple of 8 bits{{$}}}} +} + +void test_simd_vmvn(void) +{ + uint16x8_t h; + h = vmvnq_n_u16(0x0000); + h = vmvnq_n_u16(0x0001); + h = vmvnq_n_u16(0x00FF); + h = vmvnq_n_u16(0x0100); + h = vmvnq_n_u16(0x0101); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}} + h = vmvnq_n_u16(0x01FF); + h = vmvnq_n_u16(0xFF00); + + uint32x4_t w; + w = vmvnq_n_u32(0x00000000); + w = vmvnq_n_u32(0x00000001); + w = vmvnq_n_u32(0x000000FF); + w = vmvnq_n_u32(0x00000100); + w = vmvnq_n_u32(0x0000FF00); + w = vmvnq_n_u32(0x00010000); + w = vmvnq_n_u32(0x00FF0000); + w = vmvnq_n_u32(0x01000000); + w = vmvnq_n_u32(0xFF000000); + w = vmvnq_n_u32(0x01000001); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}} + w = vmvnq_n_u32(0x01FFFFFF); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}} + w = vmvnq_n_u32(0x0001FFFF); // expected-error {{argument should be an 8-bit value shifted by a multiple of 8 bits, or in the form 0x??FF}} + w = vmvnq_n_u32(0x000001FF); +} diff --git a/clang/utils/TableGen/MveEmitter.cpp b/clang/utils/TableGen/MveEmitter.cpp --- a/clang/utils/TableGen/MveEmitter.cpp +++ b/clang/utils/TableGen/MveEmitter.cpp @@ -883,38 +883,41 @@ break; case ImmediateArg::BoundsType::UInt: lo = 0; - hi = IA.i1; + hi = llvm::APInt::getMaxValue(IA.i1).zext(128); break; } - llvm::APInt typelo, typehi; - unsigned Bits = IA.ArgType->sizeInBits(); - if (cast(IA.ArgType)->kind() == ScalarTypeKind::SignedInt) { - typelo = llvm::APInt::getSignedMinValue(Bits).sext(128); - typehi = llvm::APInt::getSignedMaxValue(Bits).sext(128); - } else { - typelo = llvm::APInt::getMinValue(Bits).zext(128); - typehi = llvm::APInt::getMaxValue(Bits).zext(128); - } - std::string Index = utostr(kv.first); - if (lo.sle(typelo) && hi.sge(typehi)) - SemaChecks.push_back("SemaBuiltinConstantArg(TheCall, " + Index + ")"); - else + // Emit a range check if the legal range of values for the + // immediate is smaller than the _possible_ range of values for + // its type. + unsigned ArgTypeBits = IA.ArgType->sizeInBits(); + llvm::APInt ArgTypeRange = llvm::APInt::getMaxValue(ArgTypeBits).zext(128); + llvm::APInt ActualRange = (hi-lo).trunc(64).sext(128); + if (ActualRange.ult(ArgTypeRange)) SemaChecks.push_back("SemaBuiltinConstantArgRange(TheCall, " + Index + ", " + signedHexLiteral(lo) + ", " + signedHexLiteral(hi) + ")"); if (!IA.ExtraCheckType.empty()) { std::string Suffix; - if (!IA.ExtraCheckArgs.empty()) - Suffix = (Twine(", ") + IA.ExtraCheckArgs).str(); + if (!IA.ExtraCheckArgs.empty()) { + std::string tmp; + StringRef Arg = IA.ExtraCheckArgs; + if (Arg == "!lanesize") { + tmp = utostr(IA.ArgType->sizeInBits()); + Arg = tmp; + } + Suffix = (Twine(", ") + Arg).str(); + } SemaChecks.push_back((Twine("SemaBuiltinConstantArg") + IA.ExtraCheckType + "(TheCall, " + Index + Suffix + ")") .str()); } + + assert(!SemaChecks.empty()); } if (SemaChecks.empty()) return ""; diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -12176,7 +12176,7 @@ APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; - if (BVN && Subtarget->hasNEON() && + if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) && BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VbicVT; @@ -12483,7 +12483,7 @@ APInt SplatBits, SplatUndef; unsigned SplatBitSize; bool HasAnyUndefs; - if (BVN && Subtarget->hasNEON() && + if (BVN && (Subtarget->hasNEON() || Subtarget->hasMVEIntegerOps()) && BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { if (SplatBitSize <= 64) { EVT VorrVT; diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -274,6 +274,10 @@ def ARMvmvnImm : SDNode<"ARMISD::VMVNIMM", SDTARMVMOVIMM>; def ARMvmovFPImm : SDNode<"ARMISD::VMOVFPIMM", SDTARMVMOVIMM>; +def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def ARMvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; +def ARMvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; def SDTARMVSHIMM : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1367,37 +1367,51 @@ let Inst{3-0} = imm{3-0}; } -class MVE_VORR - : MVE_bit_cmode<"vorr", suffix, hw, (ins MQPR:$Qd_src, imm_type:$imm)> { - let Inst{5} = 0b0; - let validForTailPredication = 1; +multiclass MVE_bit_cmode_p { + def "" : MVE_bit_cmode { + let Inst{5} = opcode; + let validForTailPredication = 1; + } + + defvar Inst = !cast(NAME); + defvar UnpredPat = (VTI.Vec (op (VTI.Vec MQPR:$src), timm:$simm)); + + let Predicates = [HasMVEInt] in { + def : Pat; + def : Pat<(VTI.Vec (vselect (VTI.Pred VCCR:$pred), + UnpredPat, (VTI.Vec MQPR:$src))), + (VTI.Vec (Inst (VTI.Vec MQPR:$src), imm_type:$simm, + ARMVCCThen, (VTI.Pred VCCR:$pred)))>; + } +} + +multiclass MVE_VORRimm { + defm "": MVE_bit_cmode_p<"vorr", 0, VTI, imm_type, ARMvorrImm>; +} +multiclass MVE_VBICimm { + defm "": MVE_bit_cmode_p<"vbic", 1, VTI, imm_type, ARMvbicImm>; } -def MVE_VORRimmi16 : MVE_VORR<"i16", 1, nImmSplatI16>; -def MVE_VORRimmi32 : MVE_VORR<"i32", 0, nImmSplatI32>; +defm MVE_VORRimmi16 : MVE_VORRimm; +defm MVE_VORRimmi32 : MVE_VORRimm; +defm MVE_VBICimmi16 : MVE_VBICimm; +defm MVE_VBICimmi32 : MVE_VBICimm; def MVE_VORNimmi16 : MVEInstAlias<"vorn${vp}.i16\t$Qd, $imm", (MVE_VORRimmi16 MQPR:$Qd, nImmSplatNotI16:$imm, vpred_n:$vp), 0>; def MVE_VORNimmi32 : MVEInstAlias<"vorn${vp}.i32\t$Qd, $imm", (MVE_VORRimmi32 MQPR:$Qd, nImmSplatNotI32:$imm, vpred_n:$vp), 0>; -def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm", - (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>; - -class MVE_VBIC - : MVE_bit_cmode<"vbic", suffix, hw, (ins MQPR:$Qd_src, imm_type:$imm)> { - let Inst{5} = 0b1; - let validForTailPredication = 1; -} - -def MVE_VBICimmi16 : MVE_VBIC<"i16", 1, nImmSplatI16>; -def MVE_VBICimmi32 : MVE_VBIC<"i32", 0, nImmSplatI32>; - def MVE_VANDimmi16 : MVEInstAlias<"vand${vp}.i16\t$Qd, $imm", (MVE_VBICimmi16 MQPR:$Qd, nImmSplatNotI16:$imm, vpred_n:$vp), 0>; def MVE_VANDimmi32 : MVEInstAlias<"vand${vp}.i32\t$Qd, $imm", (MVE_VBICimmi32 MQPR:$Qd, nImmSplatNotI32:$imm, vpred_n:$vp), 0>; +def MVE_VMOV : MVEInstAlias<"vmov${vp}\t$Qd, $Qm", + (MVE_VORR MQPR:$Qd, MQPR:$Qm, MQPR:$Qm, vpred_r:$vp)>; + class MVE_VMOV_lane_direction { bit bit_20; dag oops; @@ -2206,6 +2220,15 @@ def : Pat<(v4f32 (ARMvmovFPImm timm:$simm)), (v4f32 (MVE_VMOVimmf32 nImmVMOVF32:$simm))>; + + def : Pat<(v8i16 (vselect (v8i1 VCCR:$pred), (ARMvmvnImm timm:$simm), + MQPR:$inactive)), + (v8i16 (MVE_VMVNimmi16 nImmSplatI16:$simm, + ARMVCCThen, VCCR:$pred, MQPR:$inactive))>; + def : Pat<(v4i32 (vselect (v4i1 VCCR:$pred), (ARMvmvnImm timm:$simm), + MQPR:$inactive)), + (v4i32 (MVE_VMVNimmi32 nImmSplatI32:$simm, + ARMVCCThen, VCCR:$pred, MQPR:$inactive))>; } class MVE_VMINMAXA size, @@ -2341,7 +2364,7 @@ def : Pat<(and (v4i32 MQPR:$src), (v4i32 (ARMvmovImm (i32 0xCFF)))), (MVE_VMOVLu16bh MQPR:$src)>; // zext_inreg 8 -> 16 - def : Pat<(and (v8i16 MQPR:$src), (v8i16 (ARMvmovImm (i32 0x8FF)))), + def : Pat<(ARMvbicImm (v8i16 MQPR:$src), (i32 0xAFF)), (MVE_VMOVLu8bh MQPR:$src)>; } diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -509,11 +509,6 @@ def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>; def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>; -def SDTARMVORRIMM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, - SDTCisVT<2, i32>]>; -def NEONvorrImm : SDNode<"ARMISD::VORRIMM", SDTARMVORRIMM>; -def NEONvbicImm : SDNode<"ARMISD::VBICIMM", SDTARMVORRIMM>; - def NEONvbsl : SDNode<"ARMISD::VBSL", SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0, 1>, @@ -5296,7 +5291,7 @@ IIC_VMOVImm, "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", [(set DPR:$Vd, - (v4i16 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { + (v4i16 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } @@ -5305,7 +5300,7 @@ IIC_VMOVImm, "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", [(set DPR:$Vd, - (v2i32 (NEONvorrImm DPR:$src, timm:$SIMM)))]> { + (v2i32 (ARMvorrImm DPR:$src, timm:$SIMM)))]> { let Inst{10-9} = SIMM{10-9}; } @@ -5314,7 +5309,7 @@ IIC_VMOVImm, "vorr", "i16", "$Vd, $SIMM", "$src = $Vd", [(set QPR:$Vd, - (v8i16 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { + (v8i16 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } @@ -5323,7 +5318,7 @@ IIC_VMOVImm, "vorr", "i32", "$Vd, $SIMM", "$src = $Vd", [(set QPR:$Vd, - (v4i32 (NEONvorrImm QPR:$src, timm:$SIMM)))]> { + (v4i32 (ARMvorrImm QPR:$src, timm:$SIMM)))]> { let Inst{10-9} = SIMM{10-9}; } @@ -5347,7 +5342,7 @@ IIC_VMOVImm, "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", [(set DPR:$Vd, - (v4i16 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { + (v4i16 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } @@ -5356,7 +5351,7 @@ IIC_VMOVImm, "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", [(set DPR:$Vd, - (v2i32 (NEONvbicImm DPR:$src, timm:$SIMM)))]> { + (v2i32 (ARMvbicImm DPR:$src, timm:$SIMM)))]> { let Inst{10-9} = SIMM{10-9}; } @@ -5365,7 +5360,7 @@ IIC_VMOVImm, "vbic", "i16", "$Vd, $SIMM", "$src = $Vd", [(set QPR:$Vd, - (v8i16 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { + (v8i16 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { let Inst{9} = SIMM{9}; } @@ -5374,7 +5369,7 @@ IIC_VMOVImm, "vbic", "i32", "$Vd, $SIMM", "$src = $Vd", [(set QPR:$Vd, - (v4i32 (NEONvbicImm QPR:$src, timm:$SIMM)))]> { + (v4i32 (ARMvbicImm QPR:$src, timm:$SIMM)))]> { let Inst{10-9} = SIMM{10-9}; } diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/bitwise-imm.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/bitwise-imm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/bitwise-imm.ll @@ -0,0 +1,365 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -verify-machineinstrs -o - %s | FileCheck %s + +define arm_aapcs_vfpcc <8 x i16> @test_vbicq_n_u16_sh0(<8 x i16> %a) { +; CHECK-LABEL: test_vbicq_n_u16_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic.i16 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = and <8 x i16> %a, + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vbicq_n_u16_sh8(<8 x i16> %a) { +; CHECK-LABEL: test_vbicq_n_u16_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic.i16 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = and <8 x i16> %a, + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh0(<4 x i32> %a) { +; CHECK-LABEL: test_vbicq_n_u32_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic.i32 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = and <4 x i32> %a, + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh8(<4 x i32> %a) { +; CHECK-LABEL: test_vbicq_n_u32_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic.i32 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = and <4 x i32> %a, + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh16(<4 x i32> %a) { +; CHECK-LABEL: test_vbicq_n_u32_sh16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic.i32 q0, #0x640000 +; CHECK-NEXT: bx lr +entry: + %0 = and <4 x i32> %a, + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_sh24(<4 x i32> %a) { +; CHECK-LABEL: test_vbicq_n_u32_sh24: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic.i32 q0, #0x64000000 +; CHECK-NEXT: bx lr +entry: + %0 = and <4 x i32> %a, + ret <4 x i32> %0 +} + +; The immediate in this case is legal for a VMVN but not for a VBIC, +; so in this case we expect to see the constant being prepared in +; another register. +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_n_u32_illegal(<4 x i32> %a) { +; CHECK-LABEL: test_vbicq_n_u32_illegal: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i32 q1, #0x54ff +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = and <4 x i32> %a, + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vorrq_n_u16_sh0(<8 x i16> %a) { +; CHECK-LABEL: test_vorrq_n_u16_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr.i16 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = or <8 x i16> %a, + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vorrq_n_u16_sh8(<8 x i16> %a) { +; CHECK-LABEL: test_vorrq_n_u16_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr.i16 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = or <8 x i16> %a, + ret <8 x i16> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh0(<4 x i32> %a) { +; CHECK-LABEL: test_vorrq_n_u32_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr.i32 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = or <4 x i32> %a, + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh8(<4 x i32> %a) { +; CHECK-LABEL: test_vorrq_n_u32_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr.i32 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = or <4 x i32> %a, + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh16(<4 x i32> %a) { +; CHECK-LABEL: test_vorrq_n_u32_sh16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr.i32 q0, #0x640000 +; CHECK-NEXT: bx lr +entry: + %0 = or <4 x i32> %a, + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_n_u32_sh24(<4 x i32> %a) { +; CHECK-LABEL: test_vorrq_n_u32_sh24: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr.i32 q0, #0x64000000 +; CHECK-NEXT: bx lr +entry: + %0 = or <4 x i32> %a, + ret <4 x i32> %0 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_n_u16_sh0(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vbicq_m_n_u16_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict.i16 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = and <8 x i16> %a, + %3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vbicq_m_n_u16_sh8(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vbicq_m_n_u16_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict.i16 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = and <8 x i16> %a, + %3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh0(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vbicq_m_n_u32_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict.i32 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = and <4 x i32> %a, + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh8(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vbicq_m_n_u32_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict.i32 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = and <4 x i32> %a, + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh16(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vbicq_m_n_u32_sh16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict.i32 q0, #0x640000 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = and <4 x i32> %a, + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vbicq_m_n_u32_sh24(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vbicq_m_n_u32_sh24: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vbict.i32 q0, #0x64000000 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = and <4 x i32> %a, + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_n_u16_sh0(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vorrq_m_n_u16_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt.i16 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = or <8 x i16> %a, + %3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vorrq_m_n_u16_sh8(<8 x i16> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vorrq_m_n_u16_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt.i16 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = or <8 x i16> %a, + %3 = select <8 x i1> %1, <8 x i16> %2, <8 x i16> %a + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh0(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vorrq_m_n_u32_sh0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt.i32 q0, #0x64 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = or <4 x i32> %a, + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh8(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vorrq_m_n_u32_sh8: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt.i32 q0, #0x6400 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = or <4 x i32> %a, + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh16(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vorrq_m_n_u32_sh16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt.i32 q0, #0x640000 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = or <4 x i32> %a, + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vorrq_m_n_u32_sh24(<4 x i32> %a, i16 zeroext %p) { +; CHECK-LABEL: test_vorrq_m_n_u32_sh24: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vorrt.i32 q0, #0x64000000 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = or <4 x i32> %a, + %3 = select <4 x i1> %1, <4 x i32> %2, <4 x i32> %a + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_n_u16() { +; CHECK-LABEL: test_vmvnq_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i16 q0, #0xaa00 +; CHECK-NEXT: bx lr +entry: + ret <8 x i16> +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_n_u32() { +; CHECK-LABEL: test_vmvnq_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn.i32 q0, #0xaa00 +; CHECK-NEXT: bx lr +entry: + ret <4 x i32> +} + +define arm_aapcs_vfpcc <8 x i16> @test_vmvnq_m_n_u16(<8 x i16> %inactive, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_n_u16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt.i16 q0, #0xaa00 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 %0) + %2 = select <8 x i1> %1, <8 x i16> , <8 x i16> %inactive + ret <8 x i16> %2 +} + +define arm_aapcs_vfpcc <4 x i32> @test_vmvnq_m_n_u32(<4 x i32> %inactive, i16 zeroext %p) { +; CHECK-LABEL: test_vmvnq_m_n_u32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmsr p0, r0 +; CHECK-NEXT: vpst +; CHECK-NEXT: vmvnt.i32 q0, #0xaa00 +; CHECK-NEXT: bx lr +entry: + %0 = zext i16 %p to i32 + %1 = tail call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 %0) + %2 = select <4 x i1> %1, <4 x i32> , <4 x i32> %inactive + ret <4 x i32> %2 +} + +declare <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32) +declare <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32)