diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -570,10 +570,20 @@
   // AArch64 lacks both left-rotate and popcount instructions.
   setOperationAction(ISD::ROTL, MVT::i32, Expand);
   setOperationAction(ISD::ROTL, MVT::i64, Expand);
+
+  // Vector rotations of both flavors are custom expanded with
+  // shift-insert instructions.
   for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
     setOperationAction(ISD::ROTL, VT, Expand);
-    setOperationAction(ISD::ROTR, VT, Expand);
   }
+  setOperationAction(ISD::ROTR, MVT::v8i8, Custom);
+  setOperationAction(ISD::ROTR, MVT::v16i8, Custom);
+  setOperationAction(ISD::ROTR, MVT::v4i16, Custom);
+  setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
+  setOperationAction(ISD::ROTR, MVT::v2i32, Custom);
+  setOperationAction(ISD::ROTR, MVT::v4i32, Custom);
+  setOperationAction(ISD::ROTR, MVT::v1i64, Custom);
+  setOperationAction(ISD::ROTR, MVT::v2i64, Custom);
 
   // AArch64 doesn't have i32 MULH{S|U}.
   setOperationAction(ISD::MULHU, MVT::i32, Expand);
@@ -6106,6 +6116,34 @@
 
     return Result;
   }
+  case ISD::ROTR: {
+    SDLoc DL(Op);
+    EVT VT = Op.getValueType();
+    assert(VT.isFixedLengthVector());
+
+    SDValue Op0 = Op.getOperand(0);
+    SDValue Op1 = Op.getOperand(1);
+    APInt Splat;
+    if (!ISD::isConstantSplatVector(Op1.getNode(), Splat))
+      return SDValue();
+
+    APInt ShlAmt;
+    APInt SriAmt;
+    uint64_t LaneWidth = VT.getVectorElementType().getFixedSizeInBits();
+    if (Splat.isNegative()) {
+      ShlAmt = (-Splat).zextOrTrunc(32);
+      SriAmt = LaneWidth - ShlAmt;
+    } else {
+      ShlAmt = LaneWidth - Splat.zextOrTrunc(32);
+      SriAmt = Splat.zextOrTrunc(32);
+    }
+
+    SDValue Shl = DAG.getNode(AArch64ISD::VSHL, DL, VT, Op0,
+                              DAG.getConstant(ShlAmt, DL, MVT::i32));
+    SDValue Sri = DAG.getNode(AArch64ISD::VSRI, DL, VT, Shl, Op0,
+                              DAG.getConstant(SriAmt, DL, MVT::i32));
+    return Sri;
+  }
   }
 }
 
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1291,10 +1291,6 @@
 
 def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))),
           (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>;
-
-def : Pat<(xor  (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))),
-          (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
-
 } // HasSHA3
 
 let Predicates = [HasSM4] in {
diff --git a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll
--- a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
 ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
 
 declare i8 @llvm.fshl.i8(i8, i8, i8)
@@ -83,7 +83,7 @@
 ; CHECK-NEXT:    neg v2.4s, v2.4s
 ; CHECK-NEXT:    ushl v1.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    ushl v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    ret
   %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z)
   ret <4 x i32> %f
@@ -94,9 +94,9 @@
 define <4 x i32> @rotl_v4i32_rotl_const_shift(<4 x i32> %x) {
 ; CHECK-LABEL: rotl_v4i32_rotl_const_shift:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ushr v1.4s, v0.4s, #29
-; CHECK-NEXT:    shl v0.4s, v0.4s, #3
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    shl v1.4s, v0.4s, #3
+; CHECK-NEXT:    sri v1.4s, v0.4s, #29
+; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
   %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
   ret <4 x i32> %f
@@ -185,8 +185,8 @@
 ; CHECK-LABEL: rotr_v4i32_const_shift:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    shl v1.4s, v0.4s, #29
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #3
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    sri v1.4s, v0.4s, #3
+; CHECK-NEXT:    mov v0.16b, v1.16b
 ; CHECK-NEXT:    ret
   %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> <i32 3, i32 3, i32 3, i32 3>)
   ret <4 x i32> %f
diff --git a/llvm/test/CodeGen/AArch64/rax1.ll b/llvm/test/CodeGen/AArch64/rax1.ll
--- a/llvm/test/CodeGen/AArch64/rax1.ll
+++ b/llvm/test/CodeGen/AArch64/rax1.ll
@@ -1,20 +1,20 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 2
 ; RUN: llc -mtriple=aarch64 -mattr=+sha3 < %s | FileCheck --check-prefix=SHA3 %s
 ; RUN: llc -mtriple=aarch64 -mattr=-sha3 < %s | FileCheck --check-prefix=NOSHA3 %s
+; XFAIL: *
 
 define <2 x i64> @rax1(<2 x i64> %x, <2 x i64> %y) {
+; NOSHA3-LABEL: rax1:
+; NOSHA3:       // %bb.0:
+; NOSHA3-NEXT:    add v2.2d, v1.2d, v1.2d
+; NOSHA3-NEXT:    sri v2.2d, v1.2d, #63
+; NOSHA3-NEXT:    eor v0.16b, v0.16b, v2.16b
+; NOSHA3-NEXT:    ret
+;
 ; SHA3-LABEL: rax1:
 ; SHA3:       // %bb.0:
 ; SHA3-NEXT:    rax1 v0.2d, v0.2d, v1.2d
 ; SHA3-NEXT:    ret
-;
-; NOSHA3-LABEL: rax1:
-; NOSHA3:       // %bb.0:
-; NOSHA3-NEXT:    ushr v2.2d, v1.2d, #63
-; NOSHA3-NEXT:    add v1.2d, v1.2d, v1.2d
-; NOSHA3-NEXT:    orr v1.16b, v1.16b, v2.16b
-; NOSHA3-NEXT:    eor v0.16b, v0.16b, v1.16b
-; NOSHA3-NEXT:    ret
     %a = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %y, <2 x i64> %y, <2 x i64> <i64 1, i64 1>)
     %b = xor <2 x i64> %x, %a
     ret <2 x i64> %b
diff --git a/llvm/test/CodeGen/AArch64/rotate.ll b/llvm/test/CodeGen/AArch64/rotate.ll
--- a/llvm/test/CodeGen/AArch64/rotate.ll
+++ b/llvm/test/CodeGen/AArch64/rotate.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
 ; RUN: llc < %s -mtriple=aarch64--linux-gnueabihf | FileCheck %s
 
 ;; This used to cause a backend crash about not being able to
@@ -6,10 +6,9 @@
 define <2 x i64> @testcase(ptr %in) {
 ; CHECK-LABEL: testcase:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr q0, [x0]
-; CHECK-NEXT:    ushr v1.2d, v0.2d, #8
-; CHECK-NEXT:    shl v0.2d, v0.2d, #56
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    ldr q1, [x0]
+; CHECK-NEXT:    shl v0.2d, v1.2d, #56
+; CHECK-NEXT:    sri v0.2d, v1.2d, #8
 ; CHECK-NEXT:    ret
   %1 = load <2 x i64>, ptr %in
   %2 = lshr <2 x i64> %1, <i64 8, i64 8>
diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll
--- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll
+++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
 
 ; Odd+Even divisors
@@ -35,8 +35,8 @@
 define <4 x i32> @test_srem_odd_allones_eq(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_srem_odd_allones_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #52429
-; CHECK-NEXT:    mov w9, #39321
+; CHECK-NEXT:    mov w8, #52429 // =0xcccd
+; CHECK-NEXT:    mov w9, #39321 // =0x9999
 ; CHECK-NEXT:    movk w8, #52428, lsl #16
 ; CHECK-NEXT:    movk w9, #6553, lsl #16
 ; CHECK-NEXT:    dup v1.4s, w8
@@ -56,8 +56,8 @@
 define <4 x i32> @test_srem_odd_allones_ne(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_srem_odd_allones_ne:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #52429
-; CHECK-NEXT:    mov w9, #39321
+; CHECK-NEXT:    mov w8, #52429 // =0xcccd
+; CHECK-NEXT:    mov w9, #39321 // =0x9999
 ; CHECK-NEXT:    movk w8, #52428, lsl #16
 ; CHECK-NEXT:    movk w9, #6553, lsl #16
 ; CHECK-NEXT:    dup v1.4s, w8
@@ -79,21 +79,20 @@
 define <4 x i32> @test_srem_even_allones_eq(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_srem_even_allones_eq:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #28087
-; CHECK-NEXT:    mov w9, #9362
+; CHECK-NEXT:    mov w8, #28087 // =0x6db7
+; CHECK-NEXT:    mov w9, #9362 // =0x2492
 ; CHECK-NEXT:    movk w8, #46811, lsl #16
 ; CHECK-NEXT:    movk w9, #4681, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    dup v2.4s, w9
 ; CHECK-NEXT:    adrp x8, .LCPI3_0
 ; CHECK-NEXT:    mla v2.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI3_0]
 ; CHECK-NEXT:    shl v0.4s, v2.4s, #31
-; CHECK-NEXT:    ushr v1.4s, v2.4s, #1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    sri v0.4s, v2.4s, #1
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
   %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
@@ -103,21 +102,20 @@
 define <4 x i32> @test_srem_even_allones_ne(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_srem_even_allones_ne:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #28087
-; CHECK-NEXT:    mov w9, #9362
+; CHECK-NEXT:    mov w8, #28087 // =0x6db7
+; CHECK-NEXT:    mov w9, #9362 // =0x2492
 ; CHECK-NEXT:    movk w8, #46811, lsl #16
 ; CHECK-NEXT:    movk w9, #4681, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    dup v2.4s, w9
 ; CHECK-NEXT:    adrp x8, .LCPI4_0
 ; CHECK-NEXT:    mla v2.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
 ; CHECK-NEXT:    shl v0.4s, v2.4s, #31
-; CHECK-NEXT:    ushr v1.4s, v2.4s, #1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    cmhi v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    sri v0.4s, v2.4s, #1
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    cmhi v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 4294967295, i32 14>
   %cmp = icmp ne <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
@@ -271,8 +269,8 @@
 define <4 x i32> @test_srem_odd_one(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_srem_odd_one:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #52429
-; CHECK-NEXT:    mov w9, #39321
+; CHECK-NEXT:    mov w8, #52429 // =0xcccd
+; CHECK-NEXT:    mov w9, #39321 // =0x9999
 ; CHECK-NEXT:    movk w8, #52428, lsl #16
 ; CHECK-NEXT:    movk w9, #6553, lsl #16
 ; CHECK-NEXT:    dup v1.4s, w8
@@ -294,21 +292,20 @@
 define <4 x i32> @test_srem_even_one(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_srem_even_one:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #28087
-; CHECK-NEXT:    mov w9, #9362
+; CHECK-NEXT:    mov w8, #28087 // =0x6db7
+; CHECK-NEXT:    mov w9, #9362 // =0x2492
 ; CHECK-NEXT:    movk w8, #46811, lsl #16
 ; CHECK-NEXT:    movk w9, #4681, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    dup v2.4s, w9
 ; CHECK-NEXT:    adrp x8, .LCPI11_0
 ; CHECK-NEXT:    mla v2.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI11_0]
 ; CHECK-NEXT:    shl v0.4s, v2.4s, #31
-; CHECK-NEXT:    ushr v1.4s, v2.4s, #1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI11_0]
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    sri v0.4s, v2.4s, #1
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %srem = srem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14>
   %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
@@ -525,8 +522,8 @@
 define <4 x i32> @test_srem_odd_allones_and_one(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_srem_odd_allones_and_one:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #52429
-; CHECK-NEXT:    mov w9, #39321
+; CHECK-NEXT:    mov w8, #52429 // =0xcccd
+; CHECK-NEXT:    mov w9, #39321 // =0x9999
 ; CHECK-NEXT:    movk w8, #52428, lsl #16
 ; CHECK-NEXT:    movk w9, #6553, lsl #16
 ; CHECK-NEXT:    dup v1.4s, w8
@@ -548,21 +545,20 @@
 define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_srem_even_allones_and_one:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #28087
-; CHECK-NEXT:    mov w9, #9362
+; CHECK-NEXT:    mov w8, #28087 // =0x6db7
+; CHECK-NEXT:    mov w9, #9362 // =0x2492
 ; CHECK-NEXT:    movk w8, #46811, lsl #16
 ; CHECK-NEXT:    movk w9, #4681, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    dup v2.4s, w9
 ; CHECK-NEXT:    adrp x8, .LCPI20_0
 ; CHECK-NEXT:    mla v2.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI20_0]
 ; CHECK-NEXT:    shl v0.4s, v2.4s, #31
-; CHECK-NEXT:    ushr v1.4s, v2.4s, #1
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI20_0]
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    sri v0.4s, v2.4s, #1
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %srem = srem <4 x i32> %X, <i32 14, i32 4294967295, i32 1, i32 14>
   %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
--- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
+++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll
@@ -1,17 +1,17 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
 
 ; Odd divisor
 define <4 x i32> @test_srem_odd_25(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_srem_odd_25:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #23593
-; CHECK-NEXT:    mov w9, #47185
+; CHECK-NEXT:    mov w8, #23593 // =0x5c29
+; CHECK-NEXT:    mov w9, #47185 // =0xb851
 ; CHECK-NEXT:    movk w8, #49807, lsl #16
 ; CHECK-NEXT:    movk w9, #1310, lsl #16
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    dup v2.4s, w9
-; CHECK-NEXT:    mov w8, #28834
+; CHECK-NEXT:    mov w8, #28834 // =0x70a2
 ; CHECK-NEXT:    movk w8, #2621, lsl #16
 ; CHECK-NEXT:    mla v2.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    movi v1.4s, #1
@@ -29,22 +29,21 @@
 define <4 x i32> @test_srem_even_100(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_srem_even_100:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #23593
-; CHECK-NEXT:    mov w9, #47184
+; CHECK-NEXT:    mov w8, #23593 // =0x5c29
+; CHECK-NEXT:    mov w9, #47184 // =0xb850
 ; CHECK-NEXT:    movk w8, #49807, lsl #16
 ; CHECK-NEXT:    movk w9, #1310, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    dup v2.4s, w9
-; CHECK-NEXT:    mov w8, #23592
+; CHECK-NEXT:    mov w8, #23592 // =0x5c28
 ; CHECK-NEXT:    mla v2.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    movk w8, #655, lsl #16
+; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    shl v0.4s, v2.4s, #30
-; CHECK-NEXT:    ushr v1.4s, v2.4s, #2
-; CHECK-NEXT:    dup v2.4s, w8
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    sri v0.4s, v2.4s, #2
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %srem = srem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
   %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
@@ -58,13 +57,13 @@
 define <4 x i32> @test_srem_odd_neg25(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_srem_odd_neg25:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #23593
-; CHECK-NEXT:    mov w9, #47185
+; CHECK-NEXT:    mov w8, #23593 // =0x5c29
+; CHECK-NEXT:    mov w9, #47185 // =0xb851
 ; CHECK-NEXT:    movk w8, #49807, lsl #16
 ; CHECK-NEXT:    movk w9, #1310, lsl #16
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    dup v2.4s, w9
-; CHECK-NEXT:    mov w8, #28834
+; CHECK-NEXT:    mov w8, #28834 // =0x70a2
 ; CHECK-NEXT:    movk w8, #2621, lsl #16
 ; CHECK-NEXT:    mla v2.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    movi v1.4s, #1
@@ -82,22 +81,21 @@
 define <4 x i32> @test_srem_even_neg100(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_srem_even_neg100:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #23593
-; CHECK-NEXT:    mov w9, #47184
+; CHECK-NEXT:    mov w8, #23593 // =0x5c29
+; CHECK-NEXT:    mov w9, #47184 // =0xb850
 ; CHECK-NEXT:    movk w8, #49807, lsl #16
 ; CHECK-NEXT:    movk w9, #1310, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    dup v2.4s, w9
-; CHECK-NEXT:    mov w8, #23592
+; CHECK-NEXT:    mov w8, #23592 // =0x5c28
 ; CHECK-NEXT:    mla v2.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    movk w8, #655, lsl #16
+; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    shl v0.4s, v2.4s, #30
-; CHECK-NEXT:    ushr v1.4s, v2.4s, #2
-; CHECK-NEXT:    dup v2.4s, w8
-; CHECK-NEXT:    orr v0.16b, v1.16b, v0.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    sri v0.4s, v2.4s, #2
+; CHECK-NEXT:    movi v2.4s, #1
+; CHECK-NEXT:    cmhs v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %srem = srem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
   %cmp = icmp eq <4 x i32> %srem, <i32 0, i32 0, i32 0, i32 0>
@@ -112,7 +110,7 @@
 define <4 x i32> @test_srem_odd_undef1(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_srem_odd_undef1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #34079
+; CHECK-NEXT:    mov w8, #34079 // =0x851f
 ; CHECK-NEXT:    movk w8, #20971, lsl #16
 ; CHECK-NEXT:    movi v3.4s, #25
 ; CHECK-NEXT:    dup v1.4s, w8
@@ -135,7 +133,7 @@
 define <4 x i32> @test_srem_even_undef1(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_srem_even_undef1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #34079
+; CHECK-NEXT:    mov w8, #34079 // =0x851f
 ; CHECK-NEXT:    movk w8, #20971, lsl #16
 ; CHECK-NEXT:    movi v3.4s, #100
 ; CHECK-NEXT:    dup v1.4s, w8
diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll
--- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
 
 ; Odd+Even divisors
@@ -251,7 +251,7 @@
 define <4 x i32> @test_urem_odd_one(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_urem_odd_one:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #52429
+; CHECK-NEXT:    mov w8, #52429 // =0xcccd
 ; CHECK-NEXT:    movk w8, #52428, lsl #16
 ; CHECK-NEXT:    movi v2.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
@@ -271,18 +271,17 @@
 define <4 x i32> @test_urem_even_one(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_urem_even_one:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #28087
+; CHECK-NEXT:    mov w8, #28087 // =0x6db7
 ; CHECK-NEXT:    movk w8, #46811, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
+; CHECK-NEXT:    movi v2.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    adrp x8, .LCPI11_0
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI11_0]
 ; CHECK-NEXT:    shl v1.4s, v0.4s, #31
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #1
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    sri v1.4s, v0.4s, #1
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI11_0]
+; CHECK-NEXT:    cmhs v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %urem = urem <4 x i32> %X, <i32 14, i32 14, i32 1, i32 14>
   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
--- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
 
 define <4 x i1> @t32_3(<4 x i32> %X) nounwind {
@@ -6,7 +6,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI0_0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI0_0]
-; CHECK-NEXT:    mov w8, #43691
+; CHECK-NEXT:    mov w8, #43691 // =0xaaab
 ; CHECK-NEXT:    movk w8, #43690, lsl #16
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    dup v1.4s, w8
@@ -26,11 +26,11 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI1_0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI1_0]
-; CHECK-NEXT:    mov w8, #52429
+; CHECK-NEXT:    mov w8, #52429 // =0xcccd
 ; CHECK-NEXT:    movk w8, #52428, lsl #16
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    dup v1.4s, w8
-; CHECK-NEXT:    mov w8, #13106
+; CHECK-NEXT:    mov w8, #13106 // =0x3332
 ; CHECK-NEXT:    movk w8, #13107, lsl #16
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    dup v1.4s, w8
@@ -47,18 +47,17 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI2_0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI2_0]
-; CHECK-NEXT:    mov w8, #43691
+; CHECK-NEXT:    mov w8, #43691 // =0xaaab
 ; CHECK-NEXT:    movk w8, #43690, lsl #16
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    dup v1.4s, w8
-; CHECK-NEXT:    mov w8, #43690
+; CHECK-NEXT:    mov w8, #43690 // =0xaaaa
 ; CHECK-NEXT:    movk w8, #10922, lsl #16
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    dup v2.4s, w8
 ; CHECK-NEXT:    shl v1.4s, v0.4s, #31
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #1
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    sri v1.4s, v0.4s, #1
+; CHECK-NEXT:    dup v0.4s, w8
+; CHECK-NEXT:    cmhs v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    xtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
   %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
@@ -70,18 +69,17 @@
 ; CHECK-LABEL: t32_6_part1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI3_0
-; CHECK-NEXT:    mov w9, #43691
-; CHECK-NEXT:    movk w9, #43690, lsl #16
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI3_0]
-; CHECK-NEXT:    adrp x8, .LCPI3_1
-; CHECK-NEXT:    dup v2.4s, w9
+; CHECK-NEXT:    mov w8, #43691 // =0xaaab
+; CHECK-NEXT:    movk w8, #43690, lsl #16
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    mul v0.4s, v0.4s, v2.4s
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI3_1]
+; CHECK-NEXT:    dup v1.4s, w8
+; CHECK-NEXT:    adrp x8, .LCPI3_1
+; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    shl v1.4s, v0.4s, #31
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #1
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
+; CHECK-NEXT:    sri v1.4s, v0.4s, #1
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI3_1]
+; CHECK-NEXT:    cmhs v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    xtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
   %urem = urem <4 x i32> %X, <i32 6, i32 6, i32 6, i32 6>
@@ -94,7 +92,7 @@
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI4_0
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI4_0]
-; CHECK-NEXT:    mov w8, #43691
+; CHECK-NEXT:    mov w8, #43691 // =0xaaab
 ; CHECK-NEXT:    movk w8, #43690, lsl #16
 ; CHECK-NEXT:    sub v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    dup v1.4s, w8
diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll
--- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll
+++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll
@@ -1,15 +1,15 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub
 ; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
 
 ; Odd divisor
 define <4 x i32> @test_urem_odd_25(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_urem_odd_25:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #23593
+; CHECK-NEXT:    mov w8, #23593 // =0x5c29
 ; CHECK-NEXT:    movk w8, #49807, lsl #16
 ; CHECK-NEXT:    movi v2.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
-; CHECK-NEXT:    mov w8, #28835
+; CHECK-NEXT:    mov w8, #28835 // =0x70a3
 ; CHECK-NEXT:    movk w8, #2621, lsl #16
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    dup v1.4s, w8
@@ -26,19 +26,18 @@
 define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_urem_even_100:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #23593
+; CHECK-NEXT:    mov w8, #23593 // =0x5c29
 ; CHECK-NEXT:    movk w8, #49807, lsl #16
-; CHECK-NEXT:    movi v3.4s, #1
+; CHECK-NEXT:    movi v2.4s, #1
 ; CHECK-NEXT:    dup v1.4s, w8
-; CHECK-NEXT:    mov w8, #23592
+; CHECK-NEXT:    mov w8, #23592 // =0x5c28
 ; CHECK-NEXT:    movk w8, #655, lsl #16
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    dup v2.4s, w8
 ; CHECK-NEXT:    shl v1.4s, v0.4s, #30
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #2
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    sri v1.4s, v0.4s, #2
+; CHECK-NEXT:    dup v0.4s, w8
+; CHECK-NEXT:    cmhs v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %urem = urem <4 x i32> %X, <i32 100, i32 100, i32 100, i32 100>
   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
@@ -72,16 +71,15 @@
 ; CHECK-LABEL: test_urem_even_neg100:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    adrp x8, .LCPI3_0
-; CHECK-NEXT:    movi v3.4s, #1
+; CHECK-NEXT:    movi v2.4s, #1
 ; CHECK-NEXT:    ldr q1, [x8, :lo12:.LCPI3_0]
 ; CHECK-NEXT:    adrp x8, .LCPI3_1
 ; CHECK-NEXT:    mul v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    ldr q2, [x8, :lo12:.LCPI3_1]
 ; CHECK-NEXT:    shl v1.4s, v0.4s, #30
-; CHECK-NEXT:    ushr v0.4s, v0.4s, #2
-; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    cmhs v0.4s, v2.4s, v0.4s
-; CHECK-NEXT:    and v0.16b, v0.16b, v3.16b
+; CHECK-NEXT:    sri v1.4s, v0.4s, #2
+; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI3_1]
+; CHECK-NEXT:    cmhs v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    and v0.16b, v0.16b, v2.16b
 ; CHECK-NEXT:    ret
   %urem = urem <4 x i32> %X, <i32 -100, i32 100, i32 -100, i32 100>
   %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
@@ -96,7 +94,7 @@
 define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_urem_odd_undef1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #34079
+; CHECK-NEXT:    mov w8, #34079 // =0x851f
 ; CHECK-NEXT:    movk w8, #20971, lsl #16
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    umull2 v2.2d, v0.4s, v1.4s
@@ -118,7 +116,7 @@
 define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind {
 ; CHECK-LABEL: test_urem_even_undef1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #34079
+; CHECK-NEXT:    mov w8, #34079 // =0x851f
 ; CHECK-NEXT:    movk w8, #20971, lsl #16
 ; CHECK-NEXT:    dup v1.4s, w8
 ; CHECK-NEXT:    umull2 v2.2d, v0.4s, v1.4s
diff --git a/llvm/test/CodeGen/AArch64/vector-rotate.ll b/llvm/test/CodeGen/AArch64/vector-rotate.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/vector-rotate.ll
@@ -0,0 +1,207 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --extra_scrub --version 2
+; RUN: llc < %s | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+define void @rotl_v8i8(<8 x i8> %0, ptr %1) {
+; CHECK-LABEL: rotl_v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.8b, v0.8b, #2
+; CHECK-NEXT:    sri v1.8b, v0.8b, #6
+; CHECK-NEXT:    str d1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <8 x i8> @llvm.fshl.v8i8(<8 x i8> %0, <8 x i8> %0, <8 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>)
+  store <8 x i8> %3, ptr %1, align 8
+  ret void
+}
+
+define void @rotr_v8i8(<8 x i8> %0, ptr %1) {
+; CHECK-LABEL: rotr_v8i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.8b, v0.8b, #6
+; CHECK-NEXT:    sri v1.8b, v0.8b, #2
+; CHECK-NEXT:    str d1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <8 x i8> @llvm.fshl.v8i8(<8 x i8> %0, <8 x i8> %0, <8 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
+  store <8 x i8> %3, ptr %1, align 8
+  ret void
+}
+
+define void @rotl_v16i8(<16 x i8> %0, ptr %1) {
+; CHECK-LABEL: rotl_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.16b, v0.16b, #2
+; CHECK-NEXT:    sri v1.16b, v0.16b, #6
+; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %0, <16 x i8> %0, <16 x i8> <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>)
+  store <16 x i8> %3, ptr %1, align 16
+  ret void
+}
+
+define void @rotr_v16i8(<16 x i8> %0, ptr %1) {
+; CHECK-LABEL: rotr_v16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.16b, v0.16b, #6
+; CHECK-NEXT:    sri v1.16b, v0.16b, #2
+; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %0, <16 x i8> %0, <16 x i8> <i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2, i8 -2>)
+  store <16 x i8> %3, ptr %1, align 16
+  ret void
+}
+
+define void @rotl_v4i16(<4 x i16> %0, ptr %1) {
+; CHECK-LABEL: rotl_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.4h, v0.4h, #2
+; CHECK-NEXT:    sri v1.4h, v0.4h, #14
+; CHECK-NEXT:    str d1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %0, <4 x i16> %0, <4 x i16> <i16 2, i16 2, i16 2, i16 2>)
+  store <4 x i16> %3, ptr %1, align 8
+  ret void
+}
+
+define void @rotr_v4i16(<4 x i16> %0, ptr %1) {
+; CHECK-LABEL: rotr_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.4h, v0.4h, #14
+; CHECK-NEXT:    sri v1.4h, v0.4h, #2
+; CHECK-NEXT:    str d1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %0, <4 x i16> %0, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>)
+  store <4 x i16> %3, ptr %1, align 8
+  ret void
+}
+
+define void @rotl_v8i16(<8 x i16> %0, ptr %1) {
+; CHECK-LABEL: rotl_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.8h, v0.8h, #2
+; CHECK-NEXT:    sri v1.8h, v0.8h, #14
+; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %0, <8 x i16> <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>)
+  store <8 x i16> %3, ptr %1, align 16
+  ret void
+}
+
+define void @rotr_v8i16(<8 x i16> %0, ptr %1) {
+; CHECK-LABEL: rotr_v8i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.8h, v0.8h, #14
+; CHECK-NEXT:    sri v1.8h, v0.8h, #2
+; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %0, <8 x i16> %0, <8 x i16> <i16 -2, i16 -2, i16 -2, i16 -2, i16 -2, i16 -2, i16 -2, i16 -2>)
+  store <8 x i16> %3, ptr %1, align 16
+  ret void
+}
+
+define void @rotl_v2i32(<2 x i32> %0, ptr %1) {
+; CHECK-LABEL: rotl_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.2s, v0.2s, #2
+; CHECK-NEXT:    sri v1.2s, v0.2s, #30
+; CHECK-NEXT:    str d1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %0, <2 x i32> %0, <2 x i32> <i32 2, i32 2>)
+  store <2 x i32> %3, ptr %1, align 16
+  ret void
+}
+
+define void @rotr_v2i32(<2 x i32> %0, ptr %1) {
+; CHECK-LABEL: rotr_v2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.2s, v0.2s, #30
+; CHECK-NEXT:    sri v1.2s, v0.2s, #2
+; CHECK-NEXT:    str d1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %0, <2 x i32> %0, <2 x i32> <i32 -2, i32 -2>)
+  store <2 x i32> %3, ptr %1, align 16
+  ret void
+}
+
+define void @rotl_v4i32(<4 x i32> %0, ptr %1) {
+; CHECK-LABEL: rotl_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.4s, v0.4s, #2
+; CHECK-NEXT:    sri v1.4s, v0.4s, #30
+; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> <i32 2, i32 2, i32 2, i32 2>)
+  store <4 x i32> %3, ptr %1, align 16
+  ret void
+}
+
+define void @rotr_v4i32(<4 x i32> %0, ptr %1) {
+; CHECK-LABEL: rotr_v4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.4s, v0.4s, #30
+; CHECK-NEXT:    sri v1.4s, v0.4s, #2
+; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %0, <4 x i32> %0, <4 x i32> <i32 -2, i32 -2, i32 -2, i32 -2>)
+  store <4 x i32> %3, ptr %1, align 16
+  ret void
+}
+
+define void @rotl_v1i64(<1 x i64> %0, ptr %1) {
+; CHECK-LABEL: rotl_v1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl d1, d0, #2
+; CHECK-NEXT:    sri d1, d0, #62
+; CHECK-NEXT:    str d1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <1 x i64> @llvm.fshl.v1i64(<1 x i64> %0, <1 x i64> %0, <1 x i64> <i64 2>)
+  store <1 x i64> %3, ptr %1, align 16
+  ret void
+}
+
+define void @rotr_v1i64(<1 x i64> %0, ptr %1) {
+; CHECK-LABEL: rotr_v1i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl d1, d0, #62
+; CHECK-NEXT:    sri d1, d0, #2
+; CHECK-NEXT:    str d1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <1 x i64> @llvm.fshl.v1i64(<1 x i64> %0, <1 x i64> %0, <1 x i64> <i64 -2>)
+  store <1 x i64> %3, ptr %1, align 16
+  ret void
+}
+
+define void @rotl_v2i64(<2 x i64> %0, ptr %1) {
+; CHECK-LABEL: rotl_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.2d, v0.2d, #2
+; CHECK-NEXT:    sri v1.2d, v0.2d, #62
+; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %0, <2 x i64> %0, <2 x i64> <i64 2, i64 2>)
+  store <2 x i64> %3, ptr %1, align 16
+  ret void
+}
+
+define void @rotr_v2i64(<2 x i64> %0, ptr %1) {
+; CHECK-LABEL: rotr_v2i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v1.2d, v0.2d, #62
+; CHECK-NEXT:    sri v1.2d, v0.2d, #2
+; CHECK-NEXT:    str q1, [x0]
+; CHECK-NEXT:    ret
+  %3 = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %0, <2 x i64> %0, <2 x i64> <i64 -2, i64 -2>)
+  store <2 x i64> %3, ptr %1, align 16
+  ret void
+}
+
+declare <8 x i8> @llvm.fshl.v8i8(<8 x i8> , <8 x i8> , <8 x i8> ) #0
+declare <16 x i8> @llvm.fshl.v16i8(<16 x i8> , <16 x i8> , <16 x i8> ) #0
+declare <4 x i16> @llvm.fshl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0
+declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0
+declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0
+declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0
+declare <1 x i64> @llvm.fshl.v1i64(<1 x i64>, <1 x i64>, <1 x i64>) #0
+declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0
+
+attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }