Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
===================================================================
--- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -85,6 +85,7 @@
   // Do some preprocessing of G_PHIs before we begin selection.
   void processPHIs(MachineFunction &MF);
 
+  bool earlySelectAND(MachineInstr &MI, MachineRegisterInfo &MRI) const;
   bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
 
   /// Eliminate same-sized cross-bank copies into stores before selectImpl().
@@ -1666,6 +1667,76 @@
   return true;
 }
 
+bool AArch64InstructionSelector::earlySelectAND(
+    MachineInstr &MI, MachineRegisterInfo &MRI) const {
+  assert(MI.getOpcode() == TargetOpcode::G_AND);
+  // Look for the following:
+  // %low_bit_mask_cst = G_CONSTANT iN low_bit_mask
+  // %immr_cst = G_CONSTANT iN immr
+  // %lshr = G_LSHR %something, %immr_cst
+  // %dst = G_AND %lshr, %low_bit_mask_cst
+  //
+  // And produce
+  //
+  // %dst = UBFM %something, immr, imms
+  //
+  // Where imms = immr + trailing_ones(low_bit_mask)
+  //
+  // When both immr and imms are in the range [0, size of dst in bits)
+  //
+  // TODO: Handle other cases from isBitfieldExtractOpFromAnd in
+  // AArch64ISelDAGToDAG.
+  Register Dst = MI.getOperand(0).getReg();
+  auto DstTy = MRI.getType(Dst);
+  if (DstTy.isVector())
+    return false;
+
+  // UBFM only supports 32-bit and 64-bit registers.
+  unsigned DstSize = DstTy.getSizeInBits();
+  if (DstSize != 32 && DstSize != 64)
+    return false;
+  Register LHS = MI.getOperand(1).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+
+  // Look for a mask on the G_AND's RHS.
+  // The immediate is a mask of the low bits iff imm & (imm+1) == 0
+  auto MaybeLowBitMask = getConstantVRegValWithLookThrough(RHS, MRI);
+  if (!MaybeLowBitMask)
+    return false;
+  uint64_t LowBitMask = MaybeLowBitMask->Value;
+  if (LowBitMask & (LowBitMask + 1))
+    return false;
+
+  // Look for %lshr = G_LSHR %something, %constant.
+  MachineInstr *Lshr = getOpcodeDef(TargetOpcode::G_LSHR, LHS, MRI);
+  if (!Lshr)
+    return false;
+
+  auto MaybeLshrImm =
+      getConstantVRegValWithLookThrough(Lshr->getOperand(2).getReg(), MRI);
+  if (!MaybeLshrImm)
+    return false;
+
+  // Check that the immediates we want to pass to the UBFM are legal. Both must
+  // be in the range [0, DstSize).
+  uint64_t ImmR = MaybeLshrImm->Value;
+  if (ImmR >= DstSize)
+    return false;
+  uint64_t ImmS = ImmR +
+                  (DstSize == 32 ? countTrailingOnes<uint32_t>(LowBitMask)
+                                 : countTrailingOnes<uint64_t>(LowBitMask)) -
+                  1;
+  if (ImmS >= DstSize)
+    return false;
+
+  MachineIRBuilder MIB(MI);
+  unsigned Opc = DstSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri;
+  auto UBFM =
+      MIB.buildInstr(Opc, {Dst}, {Lshr->getOperand(1).getReg(), ImmR, ImmS});
+  MI.eraseFromParent();
+  return constrainSelectedInstRegOperands(*UBFM, TII, TRI, RBI);
+}
+
 bool AArch64InstructionSelector::earlySelectSHL(
     MachineInstr &I, MachineRegisterInfo &MRI) const {
   // We try to match the immediate variant of LSL, which is actually an alias
@@ -1752,6 +1823,8 @@
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
   switch (I.getOpcode()) {
+  case TargetOpcode::G_AND:
+    return earlySelectAND(I, MRI);
   case TargetOpcode::G_SHL:
     return earlySelectSHL(I, MRI);
   case TargetOpcode::G_CONSTANT: {
Index: llvm/test/CodeGen/AArch64/GlobalISel/select-and-to-ubfm.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/GlobalISel/select-and-to-ubfm.mir
@@ -0,0 +1,300 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Check that we can recognize a G_AND and a G_LSHR which can be combined into
+# a UBFM.
+
+...
+---
+name:            and_to_ubfm_s32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0
+    ; CHECK-LABEL: name: and_to_ubfm_s32
+    ; CHECK: liveins: $w0
+    ; CHECK: %copy:gpr32 = COPY $w0
+    ; CHECK: %res:gpr32 = UBFMWri %copy, 22, 22
+    ; CHECK: $w0 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:gpr(s32) = COPY $w0
+    %immr:gpr(s64) = G_CONSTANT i64 22
+    %lshr:gpr(s32) = G_LSHR %copy, %immr(s64)
+    %mask:gpr(s32) = G_CONSTANT i32 1
+    %res:gpr(s32) = G_AND %lshr, %mask
+    $w0 = COPY %res(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            and_to_ubfm_s64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: and_to_ubfm_s64
+    ; CHECK: liveins: $x0
+    ; CHECK: %copy:gpr64 = COPY $x0
+    ; CHECK: %res:gpr64 = UBFMXri %copy, 22, 22
+    ; CHECK: $x0 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $x0
+    %copy:gpr(s64) = COPY $x0
+    %immr:gpr(s64) = G_CONSTANT i64 22
+    %lshr:gpr(s64) = G_LSHR %copy, %immr(s64)
+    %mask:gpr(s64) = G_CONSTANT i64 1
+    %res:gpr(s64) = G_AND %lshr, %mask
+    $x0 = COPY %res(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            too_large_immr_s32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0
+    ; We can't combine here because both of the immediates passed to UBFM must
+    ; be smaller than the size of the register.
+    ;
+    ; In this case, immr is too large.
+    ;
+    ; CHECK-LABEL: name: too_large_immr_s32
+    ; CHECK: liveins: $w0
+    ; CHECK: %copy:gpr32 = COPY $w0
+    ; CHECK: %immr:gpr32 = MOVi32imm 40
+    ; CHECK: %lshr:gpr32 = LSRVWr %copy, %immr
+    ; CHECK: %res:gpr32sp = ANDWri %lshr, 0
+    ; CHECK: $w0 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:gpr(s32) = COPY $w0
+    %immr:gpr(s32) = G_CONSTANT i32 40
+    %lshr:gpr(s32) = G_LSHR %copy, %immr(s32)
+    %mask:gpr(s32) = G_CONSTANT i32 1
+    %res:gpr(s32) = G_AND %lshr, %mask
+    $w0 = COPY %res(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            too_large_imms_s32
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0
+    ; We can't combine here because both of the immediates passed to UBFM must
+    ; be smaller than the size of the register.
+    ;
+    ; Trailing ones of the mask: 2
+    ; immr = 31
+    ;
+    ; -> imms = 31 + 2 - 1 = 32, which is too large.
+    ;
+    ; CHECK-LABEL: name: too_large_imms_s32
+    ; CHECK: liveins: $w0
+    ; CHECK: %copy:gpr32 = COPY $w0
+    ; CHECK: %lshr:gpr32 = UBFMWri %copy, 31, 31
+    ; CHECK: %res:gpr32sp = ANDWri %lshr, 1
+    ; CHECK: $w0 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:gpr(s32) = COPY $w0
+    %immr:gpr(s64) = G_CONSTANT i64 31
+    %lshr:gpr(s32) = G_LSHR %copy, %immr(s64)
+    %mask:gpr(s32) = G_CONSTANT i32 3
+    %res:gpr(s32) = G_AND %lshr, %mask
+    $w0 = COPY %res(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            too_large_immr_s64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: too_large_immr_s64
+    ; CHECK: liveins: $x0
+    ; CHECK: %copy:gpr64 = COPY $x0
+    ; CHECK: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 64
+    ; CHECK: %immr:gpr64 = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
+    ; CHECK: %lshr:gpr64 = LSRVXr %copy, %immr
+    ; CHECK: %res:gpr64sp = ANDXri %lshr, 4096
+    ; CHECK: $x0 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $x0
+    %copy:gpr(s64) = COPY $x0
+    %immr:gpr(s64) = G_CONSTANT i64 64
+    %lshr:gpr(s64) = G_LSHR %copy, %immr(s64)
+    %mask:gpr(s64) = G_CONSTANT i64 1
+    %res:gpr(s64) = G_AND %lshr, %mask
+    $x0 = COPY %res(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            too_large_imms_s64
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+    ; We can't combine here because both of the immediates passed to UBFM must
+    ; be smaller than the size of the register.
+    ;
+    ; Trailing ones of the mask: 3
+    ; immr = 62
+    ;
+    ; -> imms = 62 + 3 = 65, which is too large.
+    ;
+    ; CHECK-LABEL: name: too_large_imms_s64
+    ; CHECK: liveins: $x0
+    ; CHECK: %copy:gpr64 = COPY $x0
+    ; CHECK: %lshr:gpr64 = UBFMXri %copy, 62, 63
+    ; CHECK: %res:gpr64sp = ANDXri %lshr, 4098
+    ; CHECK: $x0 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $x0
+    %copy:gpr(s64) = COPY $x0
+    %immr:gpr(s64) = G_CONSTANT i64 62
+    %lshr:gpr(s64) = G_LSHR %copy, %immr(s64)
+    %mask:gpr(s64) = G_CONSTANT i64 7
+    %res:gpr(s64) = G_AND %lshr, %mask
+    $x0 = COPY %res(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            bad_low_bit_mask
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0
+    ; The mask value has to be a valid low-bit mask.
+    ;
+    ; That is, mask & (mask + 1) == 0.
+    ;
+    ; mask = 2
+    ; 2 & (2 + 1) != 0
+    ;
+    ; CHECK-LABEL: name: bad_low_bit_mask
+    ; CHECK: liveins: $w0
+    ; CHECK: %copy:gpr32 = COPY $w0
+    ; CHECK: %lshr:gpr32 = UBFMWri %copy, 20, 31
+    ; CHECK: %res:gpr32sp = ANDWri %lshr, 1984
+    ; CHECK: $w0 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:gpr(s32) = COPY $w0
+    %immr:gpr(s64) = G_CONSTANT i64 20
+    %lshr:gpr(s32) = G_LSHR %copy, %immr(s64)
+    %mask:gpr(s32) = G_CONSTANT i32 2
+    %res:gpr(s32) = G_AND %lshr, %mask
+    $w0 = COPY %res(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_fold_negative_immr
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0
+    ; Both immediates must be in [0, size of register).
+    ;
+    ; immr = -10, which is not allowed for a ubfm.
+    ;
+    ; CHECK-LABEL: name: dont_fold_negative_immr
+    ; CHECK: liveins: $w0
+    ; CHECK: %copy:gpr32 = COPY $w0
+    ; CHECK: %immr:gpr32 = MOVi32imm -10
+    ; CHECK: %lshr:gpr32 = LSRVWr %copy, %immr
+    ; CHECK: %res:gpr32sp = ANDWri %lshr, 0
+    ; CHECK: $w0 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:gpr(s32) = COPY $w0
+    %immr:gpr(s32) = G_CONSTANT i32 -10
+    %lshr:gpr(s32) = G_LSHR %copy, %immr(s32)
+    %mask:gpr(s32) = G_CONSTANT i32 1
+    %res:gpr(s32) = G_AND %lshr, %mask
+    $w0 = COPY %res(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            dont_fold_negative_imms
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0
+    ; Both immediates must be in [0, size of register).
+    ;
+    ; imms = 0 + trailing ones(0) - 1 = -1, which is not allowed for a ubfm.
+    ;
+    ; CHECK-LABEL: name: dont_fold_negative_imms
+    ; CHECK: liveins: $w0
+    ; CHECK: %copy:gpr32 = COPY $w0
+    ; CHECK: %mask:gpr32 = COPY $wzr
+    ; CHECK: %res:gpr32 = ANDWrs %mask, %copy, 64
+    ; CHECK: $w0 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:gpr(s32) = COPY $w0
+    %immr:gpr(s32) = G_CONSTANT i32 0
+    %lshr:gpr(s32) = G_LSHR %copy, %immr(s32)
+    %mask:gpr(s32) = G_CONSTANT i32 0
+    %res:gpr(s32) = G_AND %lshr, %mask
+    $w0 = COPY %res(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            zero
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0
+    ; 0 is a valid value for immr and imms.
+    ; immr = 0
+    ; imms = 0 + trailing ones(1) - 1 = 0
+    ;
+    ; CHECK-LABEL: name: zero
+    ; CHECK: liveins: $w0
+    ; CHECK: %copy:gpr32 = COPY $w0
+    ; CHECK: %res:gpr32 = UBFMWri %copy, 0, 0
+    ; CHECK: $w0 = COPY %res
+    ; CHECK: RET_ReallyLR implicit $w0
+    %copy:gpr(s32) = COPY $w0
+    %immr:gpr(s32) = G_CONSTANT i32 0
+    %lshr:gpr(s32) = G_LSHR %copy, %immr(s32)
+    %mask:gpr(s32) = G_CONSTANT i32 1
+    %res:gpr(s32) = G_AND %lshr, %mask
+    $w0 = COPY %res(s32)
+    RET_ReallyLR implicit $w0
+
+...
Index: llvm/test/CodeGen/AArch64/arm64-rev.ll
===================================================================
--- llvm/test/CodeGen/AArch64/arm64-rev.ll
+++ llvm/test/CodeGen/AArch64/arm64-rev.ll
@@ -1,34 +1,34 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=aarch64-eabi -aarch64-neon-syntax=apple | FileCheck %s
-; RUN: llc < %s -global-isel -global-isel-abort=2 -pass-remarks-missed=gisel* -mtriple=aarch64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=FALLBACK,GISEL
+; RUN: llc < %s -global-isel -global-isel-abort=2 -mtriple=aarch64-eabi -aarch64-neon-syntax=apple | FileCheck %s --check-prefixes=GISEL
 
-; FALLBACK-NOT: remark{{.*}}test_rev_w
+; GISEL-NOT: remark{{.*}}test_rev_w
 define i32 @test_rev_w(i32 %a) nounwind {
 ; CHECK-LABEL: test_rev_w:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    rev w0, w0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_rev_w:
-; FALLBACK:       // %bb.0: // %entry
-; FALLBACK-NEXT:    rev w0, w0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_rev_w:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    rev w0, w0
+; GISEL-NEXT:    ret
 entry:
   %0 = tail call i32 @llvm.bswap.i32(i32 %a)
   ret i32 %0
 }
 
-; FALLBACK-NOT: remark{{.*}}test_rev_x
+; GISEL-NOT: remark{{.*}}test_rev_x
 define i64 @test_rev_x(i64 %a) nounwind {
 ; CHECK-LABEL: test_rev_x:
 ; CHECK:       // %bb.0: // %entry
 ; CHECK-NEXT:    rev x0, x0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_rev_x:
-; FALLBACK:       // %bb.0: // %entry
-; FALLBACK-NEXT:    rev x0, x0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_rev_x:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    rev x0, x0
+; GISEL-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.bswap.i64(i64 %a)
   ret i64 %0
@@ -43,12 +43,12 @@
 ; CHECK-NEXT:    lsr w0, w8, #16
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_rev_w_srl16:
-; FALLBACK:       // %bb.0: // %entry
-; FALLBACK-NEXT:    and w8, w0, #0xffff
-; FALLBACK-NEXT:    rev w8, w8
-; FALLBACK-NEXT:    lsr w0, w8, #16
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_rev_w_srl16:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    and w8, w0, #0xffff
+; GISEL-NEXT:    rev w8, w8
+; GISEL-NEXT:    lsr w0, w8, #16
+; GISEL-NEXT:    ret
 entry:
   %0 = zext i16 %a to i32
   %1 = tail call i32 @llvm.bswap.i32(i32 %0)
@@ -64,12 +64,12 @@
 ; CHECK-NEXT:    lsr w0, w8, #16
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_rev_w_srl16_load:
-; FALLBACK:       // %bb.0: // %entry
-; FALLBACK-NEXT:    ldrh w8, [x0]
-; FALLBACK-NEXT:    rev w8, w8
-; FALLBACK-NEXT:    lsr w0, w8, #16
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_rev_w_srl16_load:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    ldrh w8, [x0]
+; GISEL-NEXT:    rev w8, w8
+; GISEL-NEXT:    lsr w0, w8, #16
+; GISEL-NEXT:    ret
 entry:
   %0 = load i16, i16 *%a
   %1 = zext i16 %0 to i32
@@ -86,13 +86,13 @@
 ; CHECK-NEXT:    rev16 w0, w8
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_rev_w_srl16_add:
-; FALLBACK:       // %bb.0: // %entry
-; FALLBACK-NEXT:    and w8, w1, #0xff
-; FALLBACK-NEXT:    add w8, w8, w0, uxtb
-; FALLBACK-NEXT:    rev w8, w8
-; FALLBACK-NEXT:    lsr w0, w8, #16
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_rev_w_srl16_add:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    and w8, w1, #0xff
+; GISEL-NEXT:    add w8, w8, w0, uxtb
+; GISEL-NEXT:    rev w8, w8
+; GISEL-NEXT:    lsr w0, w8, #16
+; GISEL-NEXT:    ret
 entry:
   %0 = zext i8 %a to i32
   %1 = zext i8 %b to i32
@@ -112,13 +112,13 @@
 ; CHECK-NEXT:    lsr x0, x8, #32
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_rev_x_srl32:
-; FALLBACK:       // %bb.0: // %entry
-; FALLBACK-NEXT:    // kill: def $w0 killed $w0 def $x0
-; FALLBACK-NEXT:    ubfx x8, x0, #0, #32
-; FALLBACK-NEXT:    rev x8, x8
-; FALLBACK-NEXT:    lsr x0, x8, #32
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_rev_x_srl32:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    // kill: def $w0 killed $w0 def $x0
+; GISEL-NEXT:    ubfx x8, x0, #0, #32
+; GISEL-NEXT:    rev x8, x8
+; GISEL-NEXT:    lsr x0, x8, #32
+; GISEL-NEXT:    ret
 entry:
   %0 = zext i32 %a to i64
   %1 = tail call i64 @llvm.bswap.i64(i64 %0)
@@ -134,12 +134,12 @@
 ; CHECK-NEXT:    lsr x0, x8, #32
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_rev_x_srl32_load:
-; FALLBACK:       // %bb.0: // %entry
-; FALLBACK-NEXT:    ldr w8, [x0]
-; FALLBACK-NEXT:    rev x8, x8
-; FALLBACK-NEXT:    lsr x0, x8, #32
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_rev_x_srl32_load:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    ldr w8, [x0]
+; GISEL-NEXT:    rev x8, x8
+; GISEL-NEXT:    lsr x0, x8, #32
+; GISEL-NEXT:    ret
 entry:
   %0 = load i32, i32 *%a
   %1 = zext i32 %0 to i64
@@ -155,13 +155,13 @@
 ; CHECK-NEXT:    rev32 x0, x8
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_rev_x_srl32_shift:
-; FALLBACK:       // %bb.0: // %entry
-; FALLBACK-NEXT:    lsl x8, x0, #33
-; FALLBACK-NEXT:    lsr x8, x8, #35
-; FALLBACK-NEXT:    rev x8, x8
-; FALLBACK-NEXT:    lsr x0, x8, #32
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_rev_x_srl32_shift:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    lsl x8, x0, #33
+; GISEL-NEXT:    lsr x8, x8, #35
+; GISEL-NEXT:    rev x8, x8
+; GISEL-NEXT:    lsr x0, x8, #32
+; GISEL-NEXT:    ret
 entry:
   %0 = shl i64 %a, 33
   %1 = lshr i64 %0, 35
@@ -179,18 +179,18 @@
 ; CHECK-NEXT:    rev16 w0, w0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_rev16_w:
-; FALLBACK:       // %bb.0: // %entry
-; FALLBACK-NEXT:    lsr w8, w0, #8
-; FALLBACK-NEXT:    lsl w9, w0, #8
-; FALLBACK-NEXT:    and w10, w8, #0xff0000
-; FALLBACK-NEXT:    and w11, w9, #0xff000000
-; FALLBACK-NEXT:    and w9, w9, #0xff00
-; FALLBACK-NEXT:    orr w10, w11, w10
-; FALLBACK-NEXT:    and w8, w8, #0xff
-; FALLBACK-NEXT:    orr w9, w10, w9
-; FALLBACK-NEXT:    orr w0, w9, w8
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_rev16_w:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    lsr w8, w0, #8
+; GISEL-NEXT:    lsl w9, w0, #8
+; GISEL-NEXT:    and w8, w8, #0xff0000
+; GISEL-NEXT:    and w11, w9, #0xff000000
+; GISEL-NEXT:    and w9, w9, #0xff00
+; GISEL-NEXT:    orr w8, w11, w8
+; GISEL-NEXT:    ubfx w10, w0, #8, #8
+; GISEL-NEXT:    orr w8, w8, w9
+; GISEL-NEXT:    orr w0, w8, w10
+; GISEL-NEXT:    ret
 entry:
   %tmp1 = lshr i32 %X, 8
   %X15 = bitcast i32 %X to i32
@@ -215,12 +215,12 @@
 ; CHECK-NEXT:    ror x0, x8, #16
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_rev16_x:
-; FALLBACK:       // %bb.0: // %entry
-; FALLBACK-NEXT:    rev x8, x0
-; FALLBACK-NEXT:    lsl x9, x8, #48
-; FALLBACK-NEXT:    orr x0, x9, x8, lsr #16
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_rev16_x:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    rev x8, x0
+; GISEL-NEXT:    lsl x9, x8, #48
+; GISEL-NEXT:    orr x0, x9, x8, lsr #16
+; GISEL-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.bswap.i64(i64 %a)
   %1 = lshr i64 %0, 16
@@ -235,12 +235,12 @@
 ; CHECK-NEXT:    rev32 x0, x0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_rev32_x:
-; FALLBACK:       // %bb.0: // %entry
-; FALLBACK-NEXT:    rev x8, x0
-; FALLBACK-NEXT:    lsl x9, x8, #32
-; FALLBACK-NEXT:    orr x0, x9, x8, lsr #32
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_rev32_x:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    rev x8, x0
+; GISEL-NEXT:    lsl x9, x8, #32
+; GISEL-NEXT:    orr x0, x9, x8, lsr #32
+; GISEL-NEXT:    ret
 entry:
   %0 = tail call i64 @llvm.bswap.i64(i64 %a)
   %1 = lshr i64 %0, 32
@@ -256,11 +256,11 @@
 ; CHECK-NEXT:    rev64.8b v0, v0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_vrev64D8:
-; FALLBACK:       // %bb.0:
-; FALLBACK-NEXT:    ldr d0, [x0]
-; FALLBACK-NEXT:    rev64.8b v0, v0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_vrev64D8:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    ldr d0, [x0]
+; GISEL-NEXT:    rev64.8b v0, v0
+; GISEL-NEXT:    ret
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 	ret <8 x i8> %tmp2
@@ -273,11 +273,11 @@
 ; CHECK-NEXT:    rev64.4h v0, v0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_vrev64D16:
-; FALLBACK:       // %bb.0:
-; FALLBACK-NEXT:    ldr d0, [x0]
-; FALLBACK-NEXT:    rev64.4h v0, v0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_vrev64D16:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    ldr d0, [x0]
+; GISEL-NEXT:    rev64.4h v0, v0
+; GISEL-NEXT:    ret
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 	ret <4 x i16> %tmp2
@@ -290,11 +290,11 @@
 ; CHECK-NEXT:    rev64.2s v0, v0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_vrev64D32:
-; FALLBACK:       // %bb.0:
-; FALLBACK-NEXT:    ldr d0, [x0]
-; FALLBACK-NEXT:    rev64.2s v0, v0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_vrev64D32:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    ldr d0, [x0]
+; GISEL-NEXT:    rev64.2s v0, v0
+; GISEL-NEXT:    ret
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <2 x i32> <i32 1, i32 0>
 	ret <2 x i32> %tmp2
@@ -307,11 +307,11 @@
 ; CHECK-NEXT:    rev64.2s v0, v0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_vrev64Df:
-; FALLBACK:       // %bb.0:
-; FALLBACK-NEXT:    ldr d0, [x0]
-; FALLBACK-NEXT:    rev64.2s v0, v0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_vrev64Df:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    ldr d0, [x0]
+; GISEL-NEXT:    rev64.2s v0, v0
+; GISEL-NEXT:    ret
 	%tmp1 = load <2 x float>, <2 x float>* %A
 	%tmp2 = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> <i32 1, i32 0>
 	ret <2 x float> %tmp2
@@ -324,11 +324,11 @@
 ; CHECK-NEXT:    rev64.16b v0, v0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_vrev64Q8:
-; FALLBACK:       // %bb.0:
-; FALLBACK-NEXT:    ldr q0, [x0]
-; FALLBACK-NEXT:    rev64.16b v0, v0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_vrev64Q8:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    ldr q0, [x0]
+; GISEL-NEXT:    rev64.16b v0, v0
+; GISEL-NEXT:    ret
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
 	ret <16 x i8> %tmp2
@@ -341,11 +341,11 @@
 ; CHECK-NEXT:    rev64.8h v0, v0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_vrev64Q16:
-; FALLBACK:       // %bb.0:
-; FALLBACK-NEXT:    ldr q0, [x0]
-; FALLBACK-NEXT:    rev64.8h v0, v0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_vrev64Q16:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    ldr q0, [x0]
+; GISEL-NEXT:    rev64.8h v0, v0
+; GISEL-NEXT:    ret
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 	ret <8 x i16> %tmp2
@@ -358,11 +358,11 @@
 ; CHECK-NEXT:    rev64.4s v0, v0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_vrev64Q32:
-; FALLBACK:       // %bb.0:
-; FALLBACK-NEXT:    ldr q0, [x0]
-; FALLBACK-NEXT:    rev64.4s v0, v0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_vrev64Q32:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    ldr q0, [x0]
+; GISEL-NEXT:    rev64.4s v0, v0
+; GISEL-NEXT:    ret
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 	ret <4 x i32> %tmp2
@@ -375,11 +375,11 @@
 ; CHECK-NEXT:    rev64.4s v0, v0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_vrev64Qf:
-; FALLBACK:       // %bb.0:
-; FALLBACK-NEXT:    ldr q0, [x0]
-; FALLBACK-NEXT:    rev64.4s v0, v0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_vrev64Qf:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    ldr q0, [x0]
+; GISEL-NEXT:    rev64.4s v0, v0
+; GISEL-NEXT:    ret
 	%tmp1 = load <4 x float>, <4 x float>* %A
 	%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 	ret <4 x float> %tmp2
@@ -392,11 +392,11 @@
 ; CHECK-NEXT:    rev32.8b v0, v0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_vrev32D8:
-; FALLBACK:       // %bb.0:
-; FALLBACK-NEXT:    ldr d0, [x0]
-; FALLBACK-NEXT:    rev32.8b v0, v0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_vrev32D8:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    ldr d0, [x0]
+; GISEL-NEXT:    rev32.8b v0, v0
+; GISEL-NEXT:    ret
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
 	ret <8 x i8> %tmp2
@@ -409,11 +409,11 @@
 ; CHECK-NEXT:    rev32.4h v0, v0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_vrev32D16:
-; FALLBACK:       // %bb.0:
-; FALLBACK-NEXT:    ldr d0, [x0]
-; FALLBACK-NEXT:    rev32.4h v0, v0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_vrev32D16:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    ldr d0, [x0]
+; GISEL-NEXT:    rev32.4h v0, v0
+; GISEL-NEXT:    ret
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = shufflevector <4 x i16> %tmp1, <4 x i16> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
 	ret <4 x i16> %tmp2
@@ -428,8 +428,11 @@
 ;
 ; GISEL-LABEL: test_vrev32Q8:
 ; GISEL:       // %bb.0:
-; GISEL:         tbl.16b v0, { v0, v1 }, v2
-; GISEL:         ret
+; GISEL-NEXT:    adrp x8, .LCPI21_0
+; GISEL-NEXT:    ldr q0, [x0]
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI21_0]
+; GISEL-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; GISEL-NEXT:    ret
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
 	ret <16 x i8> %tmp2
@@ -444,8 +447,11 @@
 ;
 ; GISEL-LABEL: test_vrev32Q16:
 ; GISEL:       // %bb.0:
-; GISEL:         tbl.16b v0, { v0, v1 }, v2
-; GISEL:         ret
+; GISEL-NEXT:    adrp x8, .LCPI22_0
+; GISEL-NEXT:    ldr q0, [x0]
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI22_0]
+; GISEL-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; GISEL-NEXT:    ret
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 	ret <8 x i16> %tmp2
@@ -458,11 +464,11 @@
 ; CHECK-NEXT:    rev16.8b v0, v0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_vrev16D8:
-; FALLBACK:       // %bb.0:
-; FALLBACK-NEXT:    ldr d0, [x0]
-; FALLBACK-NEXT:    rev16.8b v0, v0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_vrev16D8:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    ldr d0, [x0]
+; GISEL-NEXT:    rev16.8b v0, v0
+; GISEL-NEXT:    ret
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
 	ret <8 x i8> %tmp2
@@ -477,8 +483,11 @@
 ;
 ; GISEL-LABEL: test_vrev16Q8:
 ; GISEL:       // %bb.0:
-; GISEL:         tbl.16b v0, { v0, v1 }, v2
-; GISEL:         ret
+; GISEL-NEXT:    adrp x8, .LCPI24_0
+; GISEL-NEXT:    ldr q0, [x0]
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI24_0]
+; GISEL-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; GISEL-NEXT:    ret
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
 	ret <16 x i8> %tmp2
@@ -493,11 +502,11 @@
 ; CHECK-NEXT:    rev64.8b v0, v0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_vrev64D8_undef:
-; FALLBACK:       // %bb.0:
-; FALLBACK-NEXT:    ldr d0, [x0]
-; FALLBACK-NEXT:    rev64.8b v0, v0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_vrev64D8_undef:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    ldr d0, [x0]
+; GISEL-NEXT:    rev64.8b v0, v0
+; GISEL-NEXT:    ret
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = shufflevector <8 x i8> %tmp1, <8 x i8> undef, <8 x i32> <i32 7, i32 undef, i32 undef, i32 4, i32 3, i32 2, i32 1, i32 0>
 	ret <8 x i8> %tmp2
@@ -512,8 +521,11 @@
 ;
 ; GISEL-LABEL: test_vrev32Q16_undef:
 ; GISEL:       // %bb.0:
-; GISEL:         tbl.16b v0, { v0, v1 }, v2
-; GISEL:         ret
+; GISEL-NEXT:    adrp x8, .LCPI26_0
+; GISEL-NEXT:    ldr q0, [x0]
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI26_0]
+; GISEL-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; GISEL-NEXT:    ret
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 2, i32 5, i32 4, i32 7, i32 undef>
 	ret <8 x i16> %tmp2
@@ -529,13 +541,13 @@
 ; CHECK-NEXT:    st1.h { v0 }[6], [x1]
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_vrev64:
-; FALLBACK:       // %bb.0: // %entry
-; FALLBACK-NEXT:    ldr q0, [x0]
-; FALLBACK-NEXT:    add x8, x1, #2 // =2
-; FALLBACK-NEXT:    st1.h { v0 }[5], [x8]
-; FALLBACK-NEXT:    st1.h { v0 }[6], [x1]
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_vrev64:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    ldr q0, [x0]
+; GISEL-NEXT:    add x8, x1, #2 // =2
+; GISEL-NEXT:    st1.h { v0 }[5], [x8]
+; GISEL-NEXT:    st1.h { v0 }[6], [x1]
+; GISEL-NEXT:    ret
 entry:
   %0 = bitcast <4 x i16>* %source to <8 x i16>*
   %tmp2 = load <8 x i16>, <8 x i16>* %0, align 4
@@ -559,18 +571,18 @@
 ; CHECK-NEXT:    str q0, [x1, #176]
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: float_vrev64:
-; FALLBACK:       // %bb.0: // %entry
-; FALLBACK-NEXT:    fmov s0, wzr
-; FALLBACK-NEXT:    mov.s v0[1], v0[0]
-; FALLBACK-NEXT:    mov.s v0[2], v0[0]
-; FALLBACK-NEXT:    adrp x8, .LCPI28_0
-; FALLBACK-NEXT:    mov.s v0[3], v0[0]
-; FALLBACK-NEXT:    ldr q1, [x0]
-; FALLBACK-NEXT:    ldr q2, [x8, :lo12:.LCPI28_0]
-; FALLBACK-NEXT:    tbl.16b v0, { v0, v1 }, v2
-; FALLBACK-NEXT:    str q0, [x1, #176]
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: float_vrev64:
+; GISEL:       // %bb.0: // %entry
+; GISEL-NEXT:    fmov s0, wzr
+; GISEL-NEXT:    mov.s v0[1], v0[0]
+; GISEL-NEXT:    mov.s v0[2], v0[0]
+; GISEL-NEXT:    adrp x8, .LCPI28_0
+; GISEL-NEXT:    mov.s v0[3], v0[0]
+; GISEL-NEXT:    ldr q1, [x0]
+; GISEL-NEXT:    ldr q2, [x8, :lo12:.LCPI28_0]
+; GISEL-NEXT:    tbl.16b v0, { v0, v1 }, v2
+; GISEL-NEXT:    str q0, [x1, #176]
+; GISEL-NEXT:    ret
 entry:
   %0 = bitcast float* %source to <4 x float>*
   %tmp2 = load <4 x float>, <4 x float>* %0, align 4
@@ -587,10 +599,10 @@
 ; CHECK-NEXT:    rev32.16b v0, v0
 ; CHECK-NEXT:    ret
 ;
-; FALLBACK-LABEL: test_vrev32_bswap:
-; FALLBACK:       // %bb.0:
-; FALLBACK-NEXT:    rev32.16b v0, v0
-; FALLBACK-NEXT:    ret
+; GISEL-LABEL: test_vrev32_bswap:
+; GISEL:       // %bb.0:
+; GISEL-NEXT:    rev32.16b v0, v0
+; GISEL-NEXT:    ret
   %bswap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %source)
   ret <4 x i32> %bswap
 }