Index: include/llvm/IR/IntrinsicsAArch64.td
===================================================================
--- include/llvm/IR/IntrinsicsAArch64.td
+++ include/llvm/IR/IntrinsicsAArch64.td
@@ -653,3 +653,10 @@
 def int_aarch64_crc32cx : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i64_ty],
     [IntrNoMem]>;
 }
+
+//===----------------------------------------------------------------------===//
+// Advanced SIMD from ARMv8 RDMA extension
+// Vector Signed Saturating Rounding Doubling Multiply Accumulate Returning High Half
+def int_aarch64_neon_sqrdmlah : AdvSIMD_2IntArg_Intrinsic;
+// Vector Signed Saturating Rounding Doubling Multiply Subtract Returning High Half
+def int_aarch64_neon_sqrdmlsh : AdvSIMD_2IntArg_Intrinsic;
Index: lib/Target/AArch64/AArch64.td
===================================================================
--- lib/Target/AArch64/AArch64.td
+++ lib/Target/AArch64/AArch64.td
@@ -26,6 +26,9 @@
 def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true",
   "Enable Advanced SIMD instructions", [FeatureFPARMv8]>;
 
+def FeatureRDMA: SubtargetFeature<"rdma","HasRDMA","true",
+  "Enable Advanced SIMD instruction extensions",[FeatureNEON]>;
+
 def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true",
   "Enable cryptographic instructions">;
 
Index: lib/Target/AArch64/AArch64InstrFormats.td
===================================================================
--- lib/Target/AArch64/AArch64InstrFormats.td
+++ lib/Target/AArch64/AArch64InstrFormats.td
@@ -8627,3 +8627,48 @@
 def : TokenAlias<".S", ".s">;
 def : TokenAlias<".D", ".d">;
 def : TokenAlias<".Q", ".q">;
+
+
+
+//===----------------------------------------------------------------------===//
+// ARMv8 RDMA extension
+let Predicates = [HasRDMA] in {
+
+class BaseSIMDThreeSameVectorExtRDMA<bit Q, bit U, bits<2> size, bits<5> opcode,
+                        RegisterOperand regtype, string asm, string kind,
+                        list<dag> pattern>
+  : BaseSIMDThreeSameVector<Q, U, size, opcode, regtype, asm, kind, pattern> {
+  let Inst{21}=0;
+}
+multiclass SIMDThreeSameVectorExtRDMA<bit U, bits<5> opc, string asm,
+                               SDPatternOperator OpNode> {
+  def v4i16 : BaseSIMDThreeSameVectorExtRDMA<0, U, 0b01, opc, V64,
+                                      asm, ".4h",
+         [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>;
+  def v8i16 : BaseSIMDThreeSameVectorExtRDMA<1, U, 0b01, opc, V128,
+                                      asm, ".8h",
+         [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>;
+  def v2i32 : BaseSIMDThreeSameVectorExtRDMA<0, U, 0b10, opc, V64,
+                                      asm, ".2s",
+         [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>;
+  def v4i32 : BaseSIMDThreeSameVectorExtRDMA<1, U, 0b10, opc, V128,
+                                      asm, ".4s",
+         [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>;
+}
+class BaseSIMDThreeScalarExtRDMA<bit U, bits<2> size, bits<5> opcode,
+                        RegisterClass regtype, string asm,
+                        list<dag> pattern>
+  : BaseSIMDThreeScalar<U, size, opcode, regtype, asm, pattern> {
+  let Inst{21} =0;
+}
+
+multiclass SIMDThreeScalarHSExtRDMA<bit U, bits<5> opc, string asm,
+                               SDPatternOperator OpNode> {
+  def v1i32  : BaseSIMDThreeScalarExtRDMA<U, 0b10, opc, FPR32, asm, []>;
+  def v1i16  : BaseSIMDThreeScalarExtRDMA<U, 0b01, opc, FPR16, asm, []>;
+
+  def : Pat<(i32 (OpNode (i32 FPR32:$Rn), (i32 FPR32:$Rm))),
+              (!cast<Instruction>(NAME#"v1i32") FPR32:$Rn, FPR32:$Rm)>;
+}
+} // let Predicates = [HasRDMA]
+//===----- END ARMv8 RDMA extension ---------------------------------------===//
Index: lib/Target/AArch64/AArch64InstrInfo.td
===================================================================
--- lib/Target/AArch64/AArch64InstrInfo.td
+++ lib/Target/AArch64/AArch64InstrInfo.td
@@ -22,6 +22,9 @@
                                  AssemblerPredicate<"FeatureCrypto", "crypto">;
 def HasCRC           : Predicate<"Subtarget->hasCRC()">,
                                  AssemblerPredicate<"FeatureCRC", "crc">;
+def HasRDMA          : Predicate<"Subtarget->hasRDMA()">,
+                                 AssemblerPredicate<"FeatureRDMA", "rdma">;
+
 def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
 def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
 def IsCyclone        : Predicate<"Subtarget->isCyclone()">;
@@ -2724,6 +2727,8 @@
 defm SQADD    : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
 defm SQDMULH  : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
 defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
+defm SQRDMLAH : SIMDThreeSameVectorExtRDMA<1,0b10000,"sqrdmlah",int_aarch64_neon_sqrdmlah>;
+defm SQRDMLSH : SIMDThreeSameVectorExtRDMA<1,0b10001,"sqrdmlsh",int_aarch64_neon_sqrdmlsh>;
 defm SQRSHL   : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
 defm SQSHL    : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
 defm SQSUB    : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
@@ -2951,6 +2956,8 @@
 defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
 defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
 defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
+defm SQRDMLAH : SIMDThreeScalarHSExtRDMA<1, 0b10000, "sqrdmlah",int_aarch64_neon_sqrdmlah>;
+defm SQRDMLSH : SIMDThreeScalarHSExtRDMA<1, 0b10001, "sqrdmlsh",int_aarch64_neon_sqrdmlsh>;
 defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
 defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
 defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
@@ -4286,6 +4293,10 @@
 
 defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
 defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
+let Predicates = [HasRDMA] in {
+  defm SQRDMLAH : SIMDIndexedHS<1, 0b1101, "sqrdmlah", int_aarch64_neon_sqrdmlah>;
+  defm SQRDMLSH : SIMDIndexedHS<1, 0b1111, "sqrdmlsh", int_aarch64_neon_sqrdmlsh>;
+}
 defm MLA   : SIMDVectorIndexedHSTied<1, 0b0000, "mla",
               TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))>>;
 defm MLS   : SIMDVectorIndexedHSTied<1, 0b0100, "mls",
Index: lib/Target/AArch64/AArch64Subtarget.h
===================================================================
--- lib/Target/AArch64/AArch64Subtarget.h
+++ lib/Target/AArch64/AArch64Subtarget.h
@@ -41,6 +41,7 @@
   bool HasNEON;
   bool HasCrypto;
   bool HasCRC;
+  bool HasRDMA;
 
   // HasZeroCycleRegMove - Has zero-cycle register mov instructions.
   bool HasZeroCycleRegMove;
@@ -99,6 +100,7 @@
   bool hasNEON() const { return HasNEON; }
   bool hasCrypto() const { return HasCrypto; }
   bool hasCRC() const { return HasCRC; }
+  bool hasRDMA() const { return HasRDMA; }
 
   bool isLittleEndian() const { return IsLittle; }
 
Index: lib/Target/AArch64/AArch64Subtarget.cpp
===================================================================
--- lib/Target/AArch64/AArch64Subtarget.cpp
+++ lib/Target/AArch64/AArch64Subtarget.cpp
@@ -48,6 +48,7 @@
                                    const TargetMachine &TM, bool LittleEndian)
     : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others),
       HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false),
+      HasRDMA(false),
       HasZeroCycleRegMove(false), HasZeroCycleZeroing(false),
       IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
       InstrInfo(initializeSubtargetDependencies(FS)),
Index: test/CodeGen/AArch64/arm64-neon-2velem-rdma.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/arm64-neon-2velem-rdma.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+rdma -fp-contract=fast | FileCheck %s
+
+declare <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32>, <4 x i32>)
+declare <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32>, <2 x i32>)
+declare <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16>, <8 x i16>)
+declare <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_vqrdmlah_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqrdmlah_lane_s16:
+; CHECK: qrdmlah {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqrdmlah2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i16> %vqrdmlah2.i
+}
+
+define <8 x i16> @test_vqrdmlahq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqrdmlahq_lane_s16:
+; CHECK: qrdmlah {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %vqrdmlah2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  ret <8 x i16> %vqrdmlah2.i
+}
+
+define <2 x i32> @test_vqrdmlah_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqrdmlah_lane_s32:
+; CHECK: qrdmlah {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqrdmlah2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i32> %vqrdmlah2.i
+}
+
+define <4 x i32> @test_vqrdmlahq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqrdmlahq_lane_s32:
+; CHECK: qrdmlah {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %vqrdmlah2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  ret <4 x i32> %vqrdmlah2.i
+}
+
+declare <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32>, <4 x i32>)
+declare <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32>, <2 x i32>)
+declare <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16>, <8 x i16>)
+declare <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16>, <4 x i16>)
+
+define <4 x i16> @test_vqrdmlsh_lane_s16(<4 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqrdmlsh_lane_s16:
+; CHECK: qrdmlsh {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
+  %vqrdmlsh2.i = tail call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> %a, <4 x i16> %shuffle)
+  ret <4 x i16> %vqrdmlsh2.i
+}
+
+define <8 x i16> @test_vqrdmlshq_lane_s16(<8 x i16> %a, <4 x i16> %v) {
+; CHECK-LABEL: test_vqrdmlshq_lane_s16:
+; CHECK: qrdmlsh {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.h[3]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <4 x i16> %v, <4 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+  %vqrdmlsh2.i = tail call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> %a, <8 x i16> %shuffle)
+  ret <8 x i16> %vqrdmlsh2.i
+}
+
+define <2 x i32> @test_vqrdmlsh_lane_s32(<2 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqrdmlsh_lane_s32:
+; CHECK: qrdmlsh {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <2 x i32> <i32 1, i32 1>
+  %vqrdmlsh2.i = tail call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> %a, <2 x i32> %shuffle)
+  ret <2 x i32> %vqrdmlsh2.i
+}
+
+define <4 x i32> @test_vqrdmlshq_lane_s32(<4 x i32> %a, <2 x i32> %v) {
+; CHECK-LABEL: test_vqrdmlshq_lane_s32:
+; CHECK: qrdmlsh {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.s[1]
+; CHECK-NEXT: ret
+entry:
+  %shuffle = shufflevector <2 x i32> %v, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
+  %vqrdmlsh2.i = tail call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> %a, <4 x i32> %shuffle)
+  ret <4 x i32> %vqrdmlsh2.i
+}
Index: test/CodeGen/AArch64/arm64-neon-rdma-apple.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/arm64-neon-rdma-apple.ll
@@ -0,0 +1,104 @@
+; RUN: llc -asm-verbose=false < %s -march=arm64 -mattr=+rdma -aarch64-neon-syntax=apple | FileCheck %s
+
+
+define <4 x i16> @sqrdmlah_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sqrdmlah_4h:
+;CHECK: sqrdmlah.4h
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = call <4 x i16>  @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @sqrdmlah_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sqrdmlah_8h:
+;CHECK: sqrdmlah.8h
+  %tmp1 = load <8 x i16>* %A
+  %tmp2 = load <8 x i16>* %B
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+  ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @sqrdmlah_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sqrdmlah_2s:
+;CHECK: sqrdmlah.2s
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @sqrdmlah_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sqrdmlah_4s:
+;CHECK: sqrdmlah.4s
+  %tmp1 = load <4 x i32>* %A
+  %tmp2 = load <4 x i32>* %B
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+  ret <4 x i32> %tmp3
+}
+
+define i32 @sqrdmlah_1s(i32* %A, i32* %B) nounwind {
+;CHECK-LABEL: sqrdmlah_1s:
+;CHECK: sqrdmlah s0, {{s[0-9]+}}, {{s[0-9]+}}
+  %tmp1 = load i32* %A
+  %tmp2 = load i32* %B
+  %tmp3 = call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 %tmp1, i32 %tmp2)
+  ret i32 %tmp3
+}
+
+declare <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqrdmlah.i32(i32, i32)
+
+define <4 x i16> @sqrdmlsh_4h(<4 x i16>* %A, <4 x i16>* %B) nounwind {
+;CHECK-LABEL: sqrdmlsh_4h:
+;CHECK: sqrdmlsh.4h
+  %tmp1 = load <4 x i16>* %A
+  %tmp2 = load <4 x i16>* %B
+  %tmp3 = call <4 x i16>  @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+  ret <4 x i16> %tmp3
+}
+
+define <8 x i16> @sqrdmlsh_8h(<8 x i16>* %A, <8 x i16>* %B) nounwind {
+;CHECK-LABEL: sqrdmlsh_8h:
+;CHECK: sqrdmlsh.8h
+  %tmp1 = load <8 x i16>* %A
+  %tmp2 = load <8 x i16>* %B
+  %tmp3 = call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+  ret <8 x i16> %tmp3
+}
+
+define <2 x i32> @sqrdmlsh_2s(<2 x i32>* %A, <2 x i32>* %B) nounwind {
+;CHECK-LABEL: sqrdmlsh_2s:
+;CHECK: sqrdmlsh.2s
+  %tmp1 = load <2 x i32>* %A
+  %tmp2 = load <2 x i32>* %B
+  %tmp3 = call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+  ret <2 x i32> %tmp3
+}
+
+define <4 x i32> @sqrdmlsh_4s(<4 x i32>* %A, <4 x i32>* %B) nounwind {
+;CHECK-LABEL: sqrdmlsh_4s:
+;CHECK: sqrdmlsh.4s
+  %tmp1 = load <4 x i32>* %A
+  %tmp2 = load <4 x i32>* %B
+  %tmp3 = call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+  ret <4 x i32> %tmp3
+}
+
+define i32 @sqrdmlsh_1s(i32* %A, i32* %B) nounwind {
+;CHECK-LABEL: sqrdmlsh_1s:
+;CHECK: sqrdmlsh s0, {{s[0-9]+}}, {{s[0-9]+}}
+  %tmp1 = load i32* %A
+  %tmp2 = load i32* %B
+  %tmp3 = call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 %tmp1, i32 %tmp2)
+  ret i32 %tmp3
+}
+
+declare <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32, i32) nounwind readnone
Index: test/CodeGen/AArch64/arm64-neon-rdma.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/arm64-neon-rdma.ll
@@ -0,0 +1,68 @@
+; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+rdma | FileCheck %s
+; arm64 has its own copy of this because of the intrinsics
+
+declare <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i16> @test_sqrdmlah_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK-LABEL: test_sqrdmlah_v4i16:
+   %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmlah.v4i16(<4 x i16> %lhs,  <4 x i16> %rhs)
+; CHECK: sqrdmlah v0.4h, v0.4h, v1.4h
+   ret <4 x i16> %prod
+}
+
+define <8 x i16> @test_sqrdmlah_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK-LABEL: test_sqrdmlah_v8i16:
+   %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmlah.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: sqrdmlah v0.8h, v0.8h, v1.8h
+   ret <8 x i16> %prod
+}
+
+define <2 x i32> @test_sqrdmlah_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlah_v2i32:
+   %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmlah.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: sqrdmlah v0.2s, v0.2s, v1.2s
+   ret <2 x i32> %prod
+}
+
+define <4 x i32> @test_sqrdmlah_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlah_v4i32:
+   %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmlah.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: sqrdmlah v0.4s, v0.4s, v1.4s
+   ret <4 x i32> %prod
+}
+
+declare <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16>, <8 x i16>)
+declare <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32>, <2 x i32>)
+declare <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32>, <4 x i32>)
+
+define <4 x i16> @test_sqrdmlsh_v4i16(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_v4i16:
+   %prod = call <4 x i16> @llvm.aarch64.neon.sqrdmlsh.v4i16(<4 x i16> %lhs,  <4 x i16> %rhs)
+; CHECK: sqrdmlsh v0.4h, v0.4h, v1.4h
+   ret <4 x i16> %prod
+}
+
+define <8 x i16> @test_sqrdmlsh_v8i16(<8 x i16> %lhs, <8 x i16> %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_v8i16:
+   %prod = call <8 x i16> @llvm.aarch64.neon.sqrdmlsh.v8i16(<8 x i16> %lhs, <8 x i16> %rhs)
+; CHECK: sqrdmlsh v0.8h, v0.8h, v1.8h
+   ret <8 x i16> %prod
+}
+
+define <2 x i32> @test_sqrdmlsh_v2i32(<2 x i32> %lhs, <2 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_v2i32:
+   %prod = call <2 x i32> @llvm.aarch64.neon.sqrdmlsh.v2i32(<2 x i32> %lhs, <2 x i32> %rhs)
+; CHECK: sqrdmlsh v0.2s, v0.2s, v1.2s
+   ret <2 x i32> %prod
+}
+
+define <4 x i32> @test_sqrdmlsh_v4i32(<4 x i32> %lhs, <4 x i32> %rhs) {
+; CHECK-LABEL: test_sqrdmlsh_v4i32:
+   %prod = call <4 x i32> @llvm.aarch64.neon.sqrdmlsh.v4i32(<4 x i32> %lhs, <4 x i32> %rhs)
+; CHECK: sqrdmlsh v0.4s, v0.4s, v1.4s
+   ret <4 x i32> %prod
+}
Index: test/MC/AArch64/armv8-extension-rdma.s
===================================================================
--- /dev/null
+++ test/MC/AArch64/armv8-extension-rdma.s
@@ -0,0 +1,154 @@
+// RUN: not llvm-mc -triple aarch64-none-linux-gnu -mattr=+rdma -show-encoding < %s 2> %t | FileCheck %s
+// RUN: FileCheck --check-prefix=CHECK-ERROR < %t %s
+  .text
+
+  //AdvSIMD vector
+  sqrdmlah v0.4h, v1.4h, v2.4h
+  sqrdmlsh v0.4h, v1.4h, v2.4h
+  sqrdmlah v0.2s, v1.2s, v2.2s
+  sqrdmlsh v0.2s, v1.2s, v2.2s
+  sqrdmlah v0.4s, v1.4s, v2.4s
+  sqrdmlsh v0.4s, v1.4s, v2.4s
+  sqrdmlah v0.8h, v1.8h, v2.8h
+  sqrdmlsh v0.8h, v1.8h, v2.8h
+// CHECK: sqrdmlah  v0.4h, v1.4h, v2.4h // encoding: [0x20,0x84,0x42,0x2e]
+// CHECK: sqrdmlsh  v0.4h, v1.4h, v2.4h // encoding: [0x20,0x8c,0x42,0x2e]
+// CHECK: sqrdmlah  v0.2s, v1.2s, v2.2s // encoding: [0x20,0x84,0x82,0x2e]
+// CHECK: sqrdmlsh  v0.2s, v1.2s, v2.2s // encoding: [0x20,0x8c,0x82,0x2e]
+// CHECK: sqrdmlah  v0.4s, v1.4s, v2.4s // encoding: [0x20,0x84,0x82,0x6e]
+// CHECK: sqrdmlsh  v0.4s, v1.4s, v2.4s // encoding: [0x20,0x8c,0x82,0x6e]
+// CHECK: sqrdmlah  v0.8h, v1.8h, v2.8h // encoding: [0x20,0x84,0x42,0x6e]
+// CHECK: sqrdmlsh  v0.8h, v1.8h, v2.8h // encoding: [0x20,0x8c,0x42,0x6e]
+
+  sqrdmlah v0.2h, v1.2h, v2.2h
+  sqrdmlsh v0.2h, v1.2h, v2.2h
+  sqrdmlah v0.8s, v1.8s, v2.8s
+  sqrdmlsh v0.8s, v1.8s, v2.8s
+  sqrdmlah v0.2s, v1.4h, v2.8h
+  sqrdmlsh v0.4s, v1.8h, v2.2s
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlah v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlsh v0.2h, v1.2h, v2.2h
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlah v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlah v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlah v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlah v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlsh v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlsh v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid vector kind qualifier
+// CHECK-ERROR:   sqrdmlsh v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlsh v0.8s, v1.8s, v2.8s
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlah v0.2s, v1.4h, v2.8h
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlsh v0.4s, v1.8h, v2.2s
+// CHECK-ERROR:                   ^
+
+  //AdvSIMD scalar
+  sqrdmlah h0, h1, h2
+  sqrdmlsh h0, h1, h2
+  sqrdmlah s0, s1, s2
+  sqrdmlsh s0, s1, s2
+// CHECK: sqrdmlah h0, h1, h2  // encoding: [0x20,0x84,0x42,0x7e]
+// CHECK: sqrdmlsh h0, h1, h2  // encoding: [0x20,0x8c,0x42,0x7e]
+// CHECK: sqrdmlah s0, s1, s2  // encoding: [0x20,0x84,0x82,0x7e]
+// CHECK: sqrdmlsh s0, s1, s2  // encoding: [0x20,0x8c,0x82,0x7e]
+
+  //AdvSIMD vector by-element
+  sqrdmlah v0.4h, v1.4h, v2.h[3]
+  sqrdmlsh v0.4h, v1.4h, v2.h[3]
+  sqrdmlah v0.2s, v1.2s, v2.s[1]
+  sqrdmlsh v0.2s, v1.2s, v2.s[1]
+  sqrdmlah v0.8h, v1.8h, v2.h[3]
+  sqrdmlsh v0.8h, v1.8h, v2.h[3]
+  sqrdmlah v0.4s, v1.4s, v2.s[3]
+  sqrdmlsh v0.4s, v1.4s, v2.s[3]
+// CHECK: sqrdmlah v0.4h, v1.4h, v2.h[3]  // encoding: [0x20,0xd0,0x72,0x2f]
+// CHECK: sqrdmlsh v0.4h, v1.4h, v2.h[3]  // encoding: [0x20,0xf0,0x72,0x2f]
+// CHECK: sqrdmlah v0.2s, v1.2s, v2.s[1]  // encoding: [0x20,0xd0,0xa2,0x2f]
+// CHECK: sqrdmlsh v0.2s, v1.2s, v2.s[1]  // encoding: [0x20,0xf0,0xa2,0x2f]
+// CHECK: sqrdmlah v0.8h, v1.8h, v2.h[3]  // encoding: [0x20,0xd0,0x72,0x6f]
+// CHECK: sqrdmlsh v0.8h, v1.8h, v2.h[3]  // encoding: [0x20,0xf0,0x72,0x6f]
+// CHECK: sqrdmlah v0.4s, v1.4s, v2.s[3]  // encoding: [0x20,0xd8,0xa2,0x6f]
+// CHECK: sqrdmlsh v0.4s, v1.4s, v2.s[3]  // encoding: [0x20,0xf8,0xa2,0x6f]
+
+  sqrdmlah v0.4s, v1.2s, v2.s[1]
+  sqrdmlsh v0.2s, v1.2d, v2.s[1]
+  sqrdmlah v0.8h, v1.8h, v2.s[3]
+  sqrdmlsh v0.8h, v1.8h, v2.h[8]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlah v0.4s, v1.2s, v2.s[1]
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlsh v0.2s, v1.2d, v2.s[1]
+// CHECK-ERROR:                   ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlah v0.8h, v1.8h, v2.s[3]
+// CHECK-ERROR:                          ^
+// CHECK-ERROR: error: vector lane must be an integer in range [0, 7].
+// CHECK-ERROR:   sqrdmlsh v0.8h, v1.8h, v2.h[8]
+// CHECK-ERROR:                              ^
+
+  //AdvSIMD scalar by-element
+  sqrdmlah h0, h1, v2.h[3]
+  sqrdmlsh h0, h1, v2.h[3]
+  sqrdmlah s0, s1, v2.s[3]
+  sqrdmlsh s0, s1, v2.s[3]
+// CHECK: sqrdmlah h0, h1, v2.h[3]  // encoding: [0x20,0xd0,0x72,0x7f]
+// CHECK: sqrdmlsh h0, h1, v2.h[3]  // encoding: [0x20,0xf0,0x72,0x7f]
+// CHECK: sqrdmlah s0, s1, v2.s[3]  // encoding: [0x20,0xd8,0xa2,0x7f]
+// CHECK: sqrdmlsh s0, s1, v2.s[3]  // encoding: [0x20,0xf8,0xa2,0x7f]
+
+  sqrdmlah b0, h1, v2.h[3]
+  sqrdmlah s0, d1, v2.s[3]
+  sqrdmlsh h0, h1, v2.s[3]
+  sqrdmlsh s0, s1, v2.s[4]
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlah b0, h1, v2.h[3]
+// CHECK-ERROR:            ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlah s0, d1, v2.s[3]
+// CHECK-ERROR:                ^
+// CHECK-ERROR: error: invalid operand for instruction
+// CHECK-ERROR:   sqrdmlsh h0, h1, v2.s[3]
+// CHECK-ERROR:                    ^
+// CHECK-ERROR: error: vector lane must be an integer in range [0, 3].
+// CHECK-ERROR:   sqrdmlsh s0, s1, v2.s[4]
+// CHECK-ERROR:                        ^
Index: test/MC/Disassembler/AArch64/armv8-extension-rdma.txt
===================================================================
--- /dev/null
+++ test/MC/Disassembler/AArch64/armv8-extension-rdma.txt
@@ -0,0 +1,53 @@
+# RUN: llvm-mc -triple aarch64-none-linux-gnu -mattr=+rdma --disassemble < %s | FileCheck %s
+
+0x20,0x84,0x42,0x2e
+0x20,0x8c,0x42,0x2e
+0x20,0x84,0x82,0x2e
+0x20,0x8c,0x82,0x2e
+0x20,0x84,0x82,0x6e
+0x20,0x8c,0x82,0x6e
+0x20,0x84,0x42,0x6e
+0x20,0x8c,0x42,0x6e
+# CHECK: sqrdmlah  v0.4h, v1.4h, v2.4h
+# CHECK: sqrdmlsh  v0.4h, v1.4h, v2.4h
+# CHECK: sqrdmlah  v0.2s, v1.2s, v2.2s
+# CHECK: sqrdmlsh  v0.2s, v1.2s, v2.2s
+# CHECK: sqrdmlah  v0.4s, v1.4s, v2.4s
+# CHECK: sqrdmlsh  v0.4s, v1.4s, v2.4s
+# CHECK: sqrdmlah  v0.8h, v1.8h, v2.8h
+# CHECK: sqrdmlsh  v0.8h, v1.8h, v2.8h
+
+0x20,0x84,0x42,0x7e
+0x20,0x8c,0x42,0x7e
+0x20,0x84,0x82,0x7e
+0x20,0x8c,0x82,0x7e
+# CHECK: sqrdmlah h0, h1, h2
+# CHECK: sqrdmlsh h0, h1, h2
+# CHECK: sqrdmlah s0, s1, s2
+# CHECK: sqrdmlsh s0, s1, s2
+
+0x20,0xd0,0x72,0x2f
+0x20,0xf0,0x72,0x2f
+0x20,0xd0,0xa2,0x2f
+0x20,0xf0,0xa2,0x2f
+0x20,0xd0,0x72,0x6f
+0x20,0xf0,0x72,0x6f
+0x20,0xd8,0xa2,0x6f
+0x20,0xf8,0xa2,0x6f
+# CHECK: sqrdmlah v0.4h, v1.4h, v2.h[3]
+# CHECK: sqrdmlsh v0.4h, v1.4h, v2.h[3]
+# CHECK: sqrdmlah v0.2s, v1.2s, v2.s[1]
+# CHECK: sqrdmlsh v0.2s, v1.2s, v2.s[1]
+# CHECK: sqrdmlah v0.8h, v1.8h, v2.h[3]
+# CHECK: sqrdmlsh v0.8h, v1.8h, v2.h[3]
+# CHECK: sqrdmlah v0.4s, v1.4s, v2.s[3]
+# CHECK: sqrdmlsh v0.4s, v1.4s, v2.s[3]
+
+0x20,0xd0,0x72,0x7f
+0x20,0xf0,0x72,0x7f
+0x20,0xd8,0xa2,0x7f
+0x20,0xf8,0xa2,0x7f
+# CHECK: sqrdmlah h0, h1, v2.h[3]
+# CHECK: sqrdmlsh h0, h1, v2.h[3]
+# CHECK: sqrdmlah s0, s1, v2.s[3]
+# CHECK: sqrdmlsh s0, s1, v2.s[3]