Index: include/llvm/IR/IntrinsicsAArch64.td =================================================================== --- include/llvm/IR/IntrinsicsAArch64.td +++ include/llvm/IR/IntrinsicsAArch64.td @@ -653,3 +653,10 @@ def int_aarch64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>; } + +//===----------------------------------------------------------------------===// +// Advanced SIMD from ARMv8 RDMA extension +// Vector Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half +def int_aarch64_neon_sqrdmlah : AdvSIMD_2IntArg_Intrinsic; +// Vector Signed Saturating Rounding Doubling Multiply Subtract Returning High Half +def int_aarch64_neon_sqrdmlsh : AdvSIMD_2IntArg_Intrinsic; Index: lib/Target/AArch64/AArch64.td =================================================================== --- lib/Target/AArch64/AArch64.td +++ lib/Target/AArch64/AArch64.td @@ -26,6 +26,9 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", "Enable Advanced SIMD instructions", [FeatureFPARMv8]>; +def FeatureRDMA: SubtargetFeature<"rdma","HasRDMA","true", + "Enable Advanced SIMD instruction extensions",[FeatureNEON]>; + def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", "Enable cryptographic instructions">; Index: lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- lib/Target/AArch64/AArch64InstrFormats.td +++ lib/Target/AArch64/AArch64InstrFormats.td @@ -8627,3 +8627,48 @@ def : TokenAlias<".S", ".s">; def : TokenAlias<".D", ".d">; def : TokenAlias<".Q", ".q">; + + + +//===----------------------------------------------------------------------===// +// ARMv8 RDMA extension +let Predicates = [HasRDMA] in { + +class BaseSIMDThreeSameVectorExtRDMA size, bits<5> opcode, + RegisterOperand regtype, string asm, string kind, + list pattern> + : BaseSIMDThreeSameVector { + let Inst{21}=0; +} +multiclass SIMDThreeSameVectorExtRDMA opc, string asm, + SDPatternOperator OpNode> { + def v4i16 : BaseSIMDThreeSameVectorExtRDMA<0, U, 0b01, opc, V64, + asm, ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; + def v8i16 : BaseSIMDThreeSameVectorExtRDMA<1, U, 0b01, opc, V128, + asm, ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; + def v2i32 : BaseSIMDThreeSameVectorExtRDMA<0, U, 0b10, opc, V64, + asm, ".2s", + [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; + def v4i32 : BaseSIMDThreeSameVectorExtRDMA<1, U, 0b10, opc, V128, + asm, ".4s", + [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; +} +class BaseSIMDThreeScalarExtRDMA size, bits<5> opcode, + RegisterClass regtype, string asm, + list pattern> + : BaseSIMDThreeScalar { + let Inst{21} =0; +} + +multiclass SIMDThreeScalarHSExtRDMA opc, string asm, + SDPatternOperator OpNode> { + def v1i32 : BaseSIMDThreeScalarExtRDMA; + def v1i16 : BaseSIMDThreeScalarExtRDMA; + + def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))), + (!cast(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>; +} +} // let Predicates = [HasRDMA] +//===----- END ARMv8 RDMA extension ---------------------------------------===// Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -22,6 +22,9 @@ AssemblerPredicate<"FeatureCrypto", "crypto">; def HasCRC : Predicate<"Subtarget->hasCRC()">, AssemblerPredicate<"FeatureCRC", "crc">; +def HasRDMA : Predicate<"Subtarget->hasRDMA()">, + AssemblerPredicate<"FeatureRDMA", "rdma">; + def IsLE : Predicate<"Subtarget->isLittleEndian()">; def IsBE : Predicate<"!Subtarget->isLittleEndian()">; def IsCyclone : Predicate<"Subtarget->isCyclone()">; @@ -2724,6 +2727,8 @@ defm SQADD : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>; defm SQDMULH : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>; +defm SQRDMLAH : SIMDThreeSameVectorExtRDMA<1,0b10000,"sqrdmlah",int_aarch64_neon_sqrdmlah>; +defm SQRDMLSH : SIMDThreeSameVectorExtRDMA<1,0b10001,"sqrdmlsh",int_aarch64_neon_sqrdmlsh>; defm SQRSHL : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>; defm SQSHL : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>; defm SQSUB : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>; @@ -2951,6 +2956,8 @@ defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; +defm SQRDMLAH : SIMDThreeScalarHSExtRDMA<1, 0b10000, "sqrdmlah",int_aarch64_neon_sqrdmlah>; +defm SQRDMLSH : SIMDThreeScalarHSExtRDMA<1, 0b10001, "sqrdmlsh",int_aarch64_neon_sqrdmlsh>; defm SQRSHL : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>; defm SQSHL : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>; defm SQSUB : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>; @@ -4286,6 +4293,10 @@ defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>; +let Predicates = [HasRDMA] in { + defm SQRDMLAH : SIMDIndexedHS<1, 0b1101, "sqrdmlah", int_aarch64_neon_sqrdmlah>; + defm SQRDMLSH : SIMDIndexedHS<1, 0b1111, "sqrdmlsh", int_aarch64_neon_sqrdmlsh>; +} defm MLA : SIMDVectorIndexedHSTied<1, 0b0000, "mla", TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>; defm MLS : SIMDVectorIndexedHSTied<1, 0b0100, "mls", Index: lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- lib/Target/AArch64/AArch64Subtarget.h +++ lib/Target/AArch64/AArch64Subtarget.h @@ -41,6 +41,7 @@ bool HasNEON; bool HasCrypto; bool HasCRC; + bool HasRDMA; // HasZeroCycleRegMove - Has zero-cycle register mov instructions. bool HasZeroCycleRegMove; @@ -99,6 +100,7 @@ bool hasNEON() const { return HasNEON; } bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } + bool hasRDMA() const { return HasRDMA; } bool isLittleEndian() const { return IsLittle; } Index: lib/Target/AArch64/AArch64Subtarget.cpp =================================================================== --- lib/Target/AArch64/AArch64Subtarget.cpp +++ lib/Target/AArch64/AArch64Subtarget.cpp @@ -48,6 +48,7 @@ const TargetMachine &TM, bool LittleEndian) : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), + HasRDMA(false), HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), Index: test/CodeGen/AArch64/arm64-neon-2velem-rdma.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/arm64-neon-2velem-rdma.ll @@ -0,0 +1,91 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+rdma -fp-contract=fast | FileCheck %s + +declare <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32>, <2 x i32>) +declare <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16>, <8 x i16>) +declare <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16>, <4 x i16>) + +define <4 x i16> @test_vqrdmlah_lane_s16(<4 x i16> %a, <4 x i16> %v) { +; CHECK-LABEL: test_vqrdmlah_lane_s16: +; CHECK: qrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> + %vqrdmlah2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> %a, <4 x i16> %shuffle) + ret <4 x i16> %vqrdmlah2.i +} + +define <8 x i16> @test_vqrdmlahq_lane_s16(<8 x i16> %a, <4 x i16> %v) { +; CHECK-LABEL: test_vqrdmlahq_lane_s16: +; CHECK: qrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> + %vqrdmlah2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> %a, <8 x i16> %shuffle) + ret <8 x i16> %vqrdmlah2.i +} + +define <2 x i32> @test_vqrdmlah_lane_s32(<2 x i32> %a, <2 x i32> %v) { +; CHECK-LABEL: test_vqrdmlah_lane_s32: +; CHECK: qrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> + %vqrdmlah2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> %a, <2 x i32> %shuffle) + ret <2 x i32> %vqrdmlah2.i +} + +define <4 x i32> @test_vqrdmlahq_lane_s32(<4 x i32> %a, <2 x i32> %v) { +; CHECK-LABEL: test_vqrdmlahq_lane_s32: +; CHECK: qrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> + %vqrdmlah2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> %a, <4 x i32> %shuffle) + ret <4 x i32> %vqrdmlah2.i +} + +declare <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32>, <4 x i32>) +declare <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32>, <2 x i32>) +declare <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16>, <8 x i16>) +declare <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16>, <4 x i16>) + +define <4 x i16> @test_vqrdmlsh_lane_s16(<4 x i16> %a, <4 x i16> %v) { +; CHECK-LABEL: test_vqrdmlsh_lane_s16: +; CHECK: qrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> + %vqrdmlsh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> %a, <4 x i16> %shuffle) + ret <4 x i16> %vqrdmlsh2.i +} + +define <8 x i16> @test_vqrdmlshq_lane_s16(<8 x i16> %a, <4 x i16> %v) { +; CHECK-LABEL: test_vqrdmlshq_lane_s16: +; CHECK: qrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> + %vqrdmlsh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> %a, <8 x i16> %shuffle) + ret <8 x i16> %vqrdmlsh2.i +} + +define <2 x i32> @test_vqrdmlsh_lane_s32(<2 x i32> %a, <2 x i32> %v) { +; CHECK-LABEL: test_vqrdmlsh_lane_s32: +; CHECK: qrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> + %vqrdmlsh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> %a, <2 x i32> %shuffle) + ret <2 x i32> %vqrdmlsh2.i +} + +define <4 x i32> @test_vqrdmlshq_lane_s32(<4 x i32> %a, <2 x i32> %v) { +; CHECK-LABEL: test_vqrdmlshq_lane_s32: +; CHECK: qrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1] +; CHECK-NEXT: ret +entry: + %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> + %vqrdmlsh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> %a, <4 x i32> %shuffle) + ret <4 x i32> %vqrdmlsh2.i +} Index: test/CodeGen/AArch64/arm64-neon-rdma-apple.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/arm64-neon-rdma-apple.ll @@ -0,0 +1,104 @@ +; RUN: llc -asm-verbose=false < %s -march=arm64 -mattr=+rdma -aarch64-neon-syntax=apple | FileCheck %s + + +define <4 x i16> @sqrdmlah_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK-LABEL: sqrdmlah_4h: +;CHECK: sqrdmlah.4h + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <8 x i16> @sqrdmlah_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: sqrdmlah_8h: +;CHECK: sqrdmlah.8h + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <2 x i32> @sqrdmlah_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK-LABEL: sqrdmlah_2s: +;CHECK: sqrdmlah.2s + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <4 x i32> @sqrdmlah_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LABEL: sqrdmlah_4s: +;CHECK: sqrdmlah.4s + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define i32 @sqrdmlah_1s(i32* %A, i32* %B) nounwind { +;CHECK-LABEL: sqrdmlah_1s: +;CHECK: sqrdmlah s0, {{s[0-9]+}}, {{s[0-9]+}} + %tmp1 = load i32* %A + %tmp2 = load i32* %B + %tmp3 = call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 %tmp1, i32 %tmp2) + ret i32 %tmp3 +} + +declare <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare i32 @llvm.aarch64.neon.sqrdmlah.i32(i32, i32) + +define <4 x i16> @sqrdmlsh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind { +;CHECK-LABEL: sqrdmlsh_4h: +;CHECK: sqrdmlsh.4h + %tmp1 = load <4 x i16>* %A + %tmp2 = load <4 x i16>* %B + %tmp3 = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) + ret <4 x i16> %tmp3 +} + +define <8 x i16> @sqrdmlsh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind { +;CHECK-LABEL: sqrdmlsh_8h: +;CHECK: sqrdmlsh.8h + %tmp1 = load <8 x i16>* %A + %tmp2 = load <8 x i16>* %B + %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) + ret <8 x i16> %tmp3 +} + +define <2 x i32> @sqrdmlsh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind { +;CHECK-LABEL: sqrdmlsh_2s: +;CHECK: sqrdmlsh.2s + %tmp1 = load <2 x i32>* %A + %tmp2 = load <2 x i32>* %B + %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) + ret <2 x i32> %tmp3 +} + +define <4 x i32> @sqrdmlsh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind { +;CHECK-LABEL: sqrdmlsh_4s: +;CHECK: sqrdmlsh.4s + %tmp1 = load <4 x i32>* %A + %tmp2 = load <4 x i32>* %B + %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) + ret <4 x i32> %tmp3 +} + +define i32 @sqrdmlsh_1s(i32* %A, i32* %B) nounwind { +;CHECK-LABEL: sqrdmlsh_1s: +;CHECK: sqrdmlsh s0, {{s[0-9]+}}, {{s[0-9]+}} + %tmp1 = load i32* %A + %tmp2 = load i32* %B + %tmp3 = call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 %tmp1, i32 %tmp2) + ret i32 %tmp3 +} + +declare <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone +declare <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone +declare <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone +declare <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone +declare i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32, i32) nounwind readnone Index: test/CodeGen/AArch64/arm64-neon-rdma.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/arm64-neon-rdma.ll @@ -0,0 +1,68 @@ +; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+rdma | FileCheck %s +; arm64 has its own copy of this because of the intrinsics + +declare <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32>, <4 x i32>) + +define <4 x i16> @test_sqrdmlah_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { +; CHECK-LABEL: test_sqrdmlah_v4i16: + %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) +; CHECK: sqrdmlah v0.4h, v0.4h, v1.4h + ret <4 x i16> %prod +} + +define <8 x i16> @test_sqrdmlah_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { +; CHECK-LABEL: test_sqrdmlah_v8i16: + %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) +; CHECK: sqrdmlah v0.8h, v0.8h, v1.8h + ret <8 x i16> %prod +} + +define <2 x i32> @test_sqrdmlah_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { +; CHECK-LABEL: test_sqrdmlah_v2i32: + %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) +; CHECK: sqrdmlah v0.2s, v0.2s, v1.2s + ret <2 x i32> %prod +} + +define <4 x i32> @test_sqrdmlah_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { +; CHECK-LABEL: test_sqrdmlah_v4i32: + %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) +; CHECK: sqrdmlah v0.4s, v0.4s, v1.4s + ret <4 x i32> %prod +} + +declare <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16>, <4 x i16>) +declare <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16>, <8 x i16>) +declare <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32>, <2 x i32>) +declare <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32>, <4 x i32>) + +define <4 x i16> @test_sqrdmlsh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) { +; CHECK-LABEL: test_sqrdmlsh_v4i16: + %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> %lhs, <4 x i16> %rhs) +; CHECK: sqrdmlsh v0.4h, v0.4h, v1.4h + ret <4 x i16> %prod +} + +define <8 x i16> @test_sqrdmlsh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) { +; CHECK-LABEL: test_sqrdmlsh_v8i16: + %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs) +; CHECK: sqrdmlsh v0.8h, v0.8h, v1.8h + ret <8 x i16> %prod +} + +define <2 x i32> @test_sqrdmlsh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) { +; CHECK-LABEL: test_sqrdmlsh_v2i32: + %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs) +; CHECK: sqrdmlsh v0.2s, v0.2s, v1.2s + ret <2 x i32> %prod +} + +define <4 x i32> @test_sqrdmlsh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) { +; CHECK-LABEL: test_sqrdmlsh_v4i32: + %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs) +; CHECK: sqrdmlsh v0.4s, v0.4s, v1.4s + ret <4 x i32> %prod +} Index: test/MC/AArch64/armv8-extension-rdma.s =================================================================== --- /dev/null +++ test/MC/AArch64/armv8-extension-rdma.s @@ -0,0 +1,154 @@ +// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+rdma -show-encoding < %s 2> %t | FileCheck %s +// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s + .text + + //AdvSIMD vector + sqrdmlah v0.4h, v1.4h, v2.4h + sqrdmlsh v0.4h, v1.4h, v2.4h + sqrdmlah v0.2s, v1.2s, v2.2s + sqrdmlsh v0.2s, v1.2s, v2.2s + sqrdmlah v0.4s, v1.4s, v2.4s + sqrdmlsh v0.4s, v1.4s, v2.4s + sqrdmlah v0.8h, v1.8h, v2.8h + sqrdmlsh v0.8h, v1.8h, v2.8h +// CHECK: sqrdmlah v0.4h, v1.4h, v2.4h // encoding: [0x20,0x84,0x42,0x2e] +// CHECK: sqrdmlsh v0.4h, v1.4h, v2.4h // encoding: [0x20,0x8c,0x42,0x2e] +// CHECK: sqrdmlah v0.2s, v1.2s, v2.2s // encoding: [0x20,0x84,0x82,0x2e] +// CHECK: sqrdmlsh v0.2s, v1.2s, v2.2s // encoding: [0x20,0x8c,0x82,0x2e] +// CHECK: sqrdmlah v0.4s, v1.4s, v2.4s // encoding: [0x20,0x84,0x82,0x6e] +// CHECK: sqrdmlsh v0.4s, v1.4s, v2.4s // encoding: [0x20,0x8c,0x82,0x6e] +// CHECK: sqrdmlah v0.8h, v1.8h, v2.8h // encoding: [0x20,0x84,0x42,0x6e] +// CHECK: sqrdmlsh v0.8h, v1.8h, v2.8h // encoding: [0x20,0x8c,0x42,0x6e] + + sqrdmlah v0.2h, v1.2h, v2.2h + sqrdmlsh v0.2h, v1.2h, v2.2h + sqrdmlah v0.8s, v1.8s, v2.8s + sqrdmlsh v0.8s, v1.8s, v2.8s + sqrdmlah v0.2s, v1.4h, v2.8h + sqrdmlsh v0.4s, v1.8h, v2.2s +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlah v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlsh v0.2h, v1.2h, v2.2h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlah v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlah v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlah v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlah v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlsh v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlsh v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid vector kind qualifier +// CHECK-ERROR: sqrdmlsh v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlsh v0.8s, v1.8s, v2.8s +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlah v0.2s, v1.4h, v2.8h +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlsh v0.4s, v1.8h, v2.2s +// CHECK-ERROR: ^ + + //AdvSIMD scalar + sqrdmlah h0, h1, h2 + sqrdmlsh h0, h1, h2 + sqrdmlah s0, s1, s2 + sqrdmlsh s0, s1, s2 +// CHECK: sqrdmlah h0, h1, h2 // encoding: [0x20,0x84,0x42,0x7e] +// CHECK: sqrdmlsh h0, h1, h2 // encoding: [0x20,0x8c,0x42,0x7e] +// CHECK: sqrdmlah s0, s1, s2 // encoding: [0x20,0x84,0x82,0x7e] +// CHECK: sqrdmlsh s0, s1, s2 // encoding: [0x20,0x8c,0x82,0x7e] + + //AdvSIMD vector by-element + sqrdmlah v0.4h, v1.4h, v2.h[3] + sqrdmlsh v0.4h, v1.4h, v2.h[3] + sqrdmlah v0.2s, v1.2s, v2.s[1] + sqrdmlsh v0.2s, v1.2s, v2.s[1] + sqrdmlah v0.8h, v1.8h, v2.h[3] + sqrdmlsh v0.8h, v1.8h, v2.h[3] + sqrdmlah v0.4s, v1.4s, v2.s[3] + sqrdmlsh v0.4s, v1.4s, v2.s[3] +// CHECK: sqrdmlah v0.4h, v1.4h, v2.h[3] // encoding: [0x20,0xd0,0x72,0x2f] +// CHECK: sqrdmlsh v0.4h, v1.4h, v2.h[3] // encoding: [0x20,0xf0,0x72,0x2f] +// CHECK: sqrdmlah v0.2s, v1.2s, v2.s[1] // encoding: [0x20,0xd0,0xa2,0x2f] +// CHECK: sqrdmlsh v0.2s, v1.2s, v2.s[1] // encoding: [0x20,0xf0,0xa2,0x2f] +// CHECK: sqrdmlah v0.8h, v1.8h, v2.h[3] // encoding: [0x20,0xd0,0x72,0x6f] +// CHECK: sqrdmlsh v0.8h, v1.8h, v2.h[3] // encoding: [0x20,0xf0,0x72,0x6f] +// CHECK: sqrdmlah v0.4s, v1.4s, v2.s[3] // encoding: [0x20,0xd8,0xa2,0x6f] +// CHECK: sqrdmlsh v0.4s, v1.4s, v2.s[3] // encoding: [0x20,0xf8,0xa2,0x6f] + + sqrdmlah v0.4s, v1.2s, v2.s[1] + sqrdmlsh v0.2s, v1.2d, v2.s[1] + sqrdmlah v0.8h, v1.8h, v2.s[3] + sqrdmlsh v0.8h, v1.8h, v2.h[8] +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlah v0.4s, v1.2s, v2.s[1] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlsh v0.2s, v1.2d, v2.s[1] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlah v0.8h, v1.8h, v2.s[3] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: vector lane must be an integer in range [0, 7]. +// CHECK-ERROR: sqrdmlsh v0.8h, v1.8h, v2.h[8] +// CHECK-ERROR: ^ + + //AdvSIMD scalar by-element + sqrdmlah h0, h1, v2.h[3] + sqrdmlsh h0, h1, v2.h[3] + sqrdmlah s0, s1, v2.s[3] + sqrdmlsh s0, s1, v2.s[3] +// CHECK: sqrdmlah h0, h1, v2.h[3] // encoding: [0x20,0xd0,0x72,0x7f] +// CHECK: sqrdmlsh h0, h1, v2.h[3] // encoding: [0x20,0xf0,0x72,0x7f] +// CHECK: sqrdmlah s0, s1, v2.s[3] // encoding: [0x20,0xd8,0xa2,0x7f] +// CHECK: sqrdmlsh s0, s1, v2.s[3] // encoding: [0x20,0xf8,0xa2,0x7f] + + sqrdmlah b0, h1, v2.h[3] + sqrdmlah s0, d1, v2.s[3] + sqrdmlsh h0, h1, v2.s[3] + sqrdmlsh s0, s1, v2.s[4] +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlah b0, h1, v2.h[3] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlah s0, d1, v2.s[3] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: invalid operand for instruction +// CHECK-ERROR: sqrdmlsh h0, h1, v2.s[3] +// CHECK-ERROR: ^ +// CHECK-ERROR: error: vector lane must be an integer in range [0, 3]. +// CHECK-ERROR: sqrdmlsh s0, s1, v2.s[4] +// CHECK-ERROR: ^ Index: test/MC/Disassembler/AArch64/armv8-extension-rdma.txt =================================================================== --- /dev/null +++ test/MC/Disassembler/AArch64/armv8-extension-rdma.txt @@ -0,0 +1,53 @@ +# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+rdma --disassemble < %s | FileCheck %s + +0x20,0x84,0x42,0x2e +0x20,0x8c,0x42,0x2e +0x20,0x84,0x82,0x2e +0x20,0x8c,0x82,0x2e +0x20,0x84,0x82,0x6e +0x20,0x8c,0x82,0x6e +0x20,0x84,0x42,0x6e +0x20,0x8c,0x42,0x6e +# CHECK: sqrdmlah v0.4h, v1.4h, v2.4h +# CHECK: sqrdmlsh v0.4h, v1.4h, v2.4h +# CHECK: sqrdmlah v0.2s, v1.2s, v2.2s +# CHECK: sqrdmlsh v0.2s, v1.2s, v2.2s +# CHECK: sqrdmlah v0.4s, v1.4s, v2.4s +# CHECK: sqrdmlsh v0.4s, v1.4s, v2.4s +# CHECK: sqrdmlah v0.8h, v1.8h, v2.8h +# CHECK: sqrdmlsh v0.8h, v1.8h, v2.8h + +0x20,0x84,0x42,0x7e +0x20,0x8c,0x42,0x7e +0x20,0x84,0x82,0x7e +0x20,0x8c,0x82,0x7e +# CHECK: sqrdmlah h0, h1, h2 +# CHECK: sqrdmlsh h0, h1, h2 +# CHECK: sqrdmlah s0, s1, s2 +# CHECK: sqrdmlsh s0, s1, s2 + +0x20,0xd0,0x72,0x2f +0x20,0xf0,0x72,0x2f +0x20,0xd0,0xa2,0x2f +0x20,0xf0,0xa2,0x2f +0x20,0xd0,0x72,0x6f +0x20,0xf0,0x72,0x6f +0x20,0xd8,0xa2,0x6f +0x20,0xf8,0xa2,0x6f +# CHECK: sqrdmlah v0.4h, v1.4h, v2.h[3] +# CHECK: sqrdmlsh v0.4h, v1.4h, v2.h[3] +# CHECK: sqrdmlah v0.2s, v1.2s, v2.s[1] +# CHECK: sqrdmlsh v0.2s, v1.2s, v2.s[1] +# CHECK: sqrdmlah v0.8h, v1.8h, v2.h[3] +# CHECK: sqrdmlsh v0.8h, v1.8h, v2.h[3] +# CHECK: sqrdmlah v0.4s, v1.4s, v2.s[3] +# CHECK: sqrdmlsh v0.4s, v1.4s, v2.s[3] + +0x20,0xd0,0x72,0x7f +0x20,0xf0,0x72,0x7f +0x20,0xd8,0xa2,0x7f +0x20,0xf8,0xa2,0x7f +# CHECK: sqrdmlah h0, h1, v2.h[3] +# CHECK: sqrdmlsh h0, h1, v2.h[3] +# CHECK: sqrdmlah s0, s1, v2.s[3] +# CHECK: sqrdmlsh s0, s1, v2.s[3]