Index: llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
===================================================================
--- llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -88,6 +88,10 @@
   void processPHIs(MachineFunction &MF);
 
   bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
+  /// Try to select a shift instruction \p I to a variable shift/rotate
+  /// instruction when something can be folded away.
+  bool earlySelectShiftAmountMod(MachineInstr &I,
+                                 MachineRegisterInfo &MRI) const;
 
   /// Eliminate same-sized cross-bank copies into stores before selectImpl().
   bool contractCrossBankCopyIntoStore(MachineInstr &I,
@@ -1670,6 +1674,78 @@
   return true;
 }
 
+bool AArch64InstructionSelector::earlySelectShiftAmountMod(
+    MachineInstr &I, MachineRegisterInfo &MRI) const {
+  // Given a situation like this:
+  //
+  // %shiftamt = G_BINOP %something, %cst
+  // %x = G_SHIFT %reg, %shiftamt
+  //
+  // It may be possible to fold the binop into the instruction and produce a
+  // variable shift.
+  unsigned GenericOpc = I.getOpcode();
+  assert((GenericOpc == TargetOpcode::G_SHL ||
+          GenericOpc == TargetOpcode::G_ASHR ||
+          GenericOpc == TargetOpcode::G_LSHR) &&
+         "Unexpected opcode?");
+  MachineIRBuilder MIB(I);
+  MachineOperand &DstOp = I.getOperand(0);
+  LLT Ty = MRI.getType(DstOp.getReg());
+  unsigned Size = Ty.getSizeInBits();
+  if (Ty.isVector() || !(Size == 32 || Size == 64))
+    return false;
+
+  // Walk past truncs/extends of the shift amount.
+  Register ShiftAmt = I.getOperand(2).getReg();
+  mi_match(ShiftAmt, MRI,
+           m_any_of(m_GZExt(m_Reg(ShiftAmt)), m_GAnyExt(m_Reg(ShiftAmt)),
+                    m_GTrunc(m_Reg(ShiftAmt))));
+
+  // TODO: Handle G_ADD and G_SUB.
+  //
+  // Try to match a situation like this:
+  //
+  //   %cst = G_CONSTANT i32 31
+  //   %shiftamt = G_AND %and_lhs, %cst
+  //   %x = G_SHL %shl_lhs, %shiftamt
+  //
+  // We can avoid producing the G_AND entirely by producing only:
+  //
+  //   %x = LSLVWr %shl_lhs, %and_lhs
+  //
+  // We can do this because LSLVWr will check the shift amount in the bottom
+  // 5 bits of %and_lhs. The mask in this case, 31, cannot change the value of
+  // those bottom 5 bits (31 = 11111). So, we don't need to use it at all in
+  // this shift.
+  MachineInstr *ShiftAmtDef = getOpcodeDef(TargetOpcode::G_AND, ShiftAmt, MRI);
+  if (!ShiftAmtDef)
+    return false;
+  int64_t MaskImm;
+  if (!mi_match(ShiftAmtDef->getOperand(2).getReg(), MRI, m_ICst(MaskImm)))
+    return false;
+  uint64_t Bits = Size == 32 ? 5 : 6;
+  if (countTrailingOnes(static_cast<unsigned>(MaskImm)) < Bits)
+    return false;
+
+  // Narrow/widen the shift amount to match the size of the shift operation.
+  Register NewShiftAmt = ShiftAmtDef->getOperand(1).getReg();
+  NewShiftAmt = Size == 32 ? narrowExtendRegIfNeeded(NewShiftAmt, MIB)
+                           : widenGPRBankRegIfNeeded(NewShiftAmt, 64, MIB);
+
+  const unsigned OpcTable[3][2] = {{AArch64::LSLVWr, AArch64::LSLVXr},
+                                   {AArch64::ASRVWr, AArch64::ASRVXr},
+                                   {AArch64::LSRVWr, AArch64::LSRVXr}};
+  unsigned ShiftIdx = GenericOpc == TargetOpcode::G_SHL
+                          ? 0
+                          : GenericOpc == TargetOpcode::G_ASHR ? 1 : 2;
+  unsigned NewOpc = OpcTable[ShiftIdx][Size == 64];
+  auto NewInst =
+      MIB.buildInstr(NewOpc, {DstOp}, {I.getOperand(1), NewShiftAmt});
+  I.eraseFromParent();
+  constrainSelectedInstRegOperands(*NewInst, TII, TRI, RBI);
+  return true;
+}
+
 bool AArch64InstructionSelector::earlySelectSHL(
     MachineInstr &I, MachineRegisterInfo &MRI) const {
   // We try to match the immediate variant of LSL, which is actually an alias
@@ -1768,7 +1844,12 @@
     return true;
   }
   case TargetOpcode::G_SHL:
-    return earlySelectSHL(I, MRI);
+    if (earlySelectSHL(I, MRI))
+      return true;
+    LLVM_FALLTHROUGH;
+  case TargetOpcode::G_ASHR:
+  case TargetOpcode::G_LSHR:
+    return earlySelectShiftAmountMod(I, MRI);
   case TargetOpcode::G_CONSTANT: {
     bool IsZero = false;
     if (I.getOperand(1).isCImm())
Index: llvm/test/CodeGen/AArch64/GlobalISel/opt-shift-amount-mod.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/GlobalISel/opt-shift-amount-mod.mir
@@ -0,0 +1,306 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -verify-machineinstrs -mtriple aarch64-unknown-uknown -global-isel -run-pass=instruction-select %s -o - | FileCheck %s
+
+...
+---
+name:            shl_and_s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: shl_and_s32
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: %binop_lhs:gpr32 = COPY $w0
+    ; CHECK: %shift_lhs:gpr32 = COPY $w1
+    ; CHECK: %shift:gpr32 = LSLVWr %shift_lhs, %binop_lhs
+    ; CHECK: $w0 = COPY %shift
+    ; CHECK: RET_ReallyLR implicit $w0
+    %binop_lhs:gpr(s32) = COPY $w0
+    %shift_lhs:gpr(s32) = COPY $w1
+    %cst:gpr(s32) = G_CONSTANT i32 31
+    %binop:gpr(s32) = G_AND %binop_lhs, %cst
+    %shift:gpr(s32) = G_SHL %shift_lhs, %binop(s32)
+    $w0 = COPY %shift(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            shl_and_s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: shl_and_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %binop_lhs:gpr64 = COPY $x0
+    ; CHECK: %shift_lhs:gpr64 = COPY $x1
+    ; CHECK: %shift:gpr64 = LSLVXr %shift_lhs, %binop_lhs
+    ; CHECK: $x0 = COPY %shift
+    ; CHECK: RET_ReallyLR implicit $x0
+    %binop_lhs:gpr(s64) = COPY $x0
+    %shift_lhs:gpr(s64) = COPY $x1
+    %cst:gpr(s64) = G_CONSTANT i64 63
+    %binop:gpr(s64) = G_AND %binop_lhs, %cst
+    %shift:gpr(s64) = G_SHL %shift_lhs, %binop(s64)
+    $x0 = COPY %shift(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            ashr_and_s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: ashr_and_s32
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: %binop_lhs:gpr32 = COPY $w0
+    ; CHECK: %shift_lhs:gpr32 = COPY $w1
+    ; CHECK: %shift:gpr32 = ASRVWr %shift_lhs, %binop_lhs
+    ; CHECK: $w0 = COPY %shift
+    ; CHECK: RET_ReallyLR implicit $w0
+    %binop_lhs:gpr(s32) = COPY $w0
+    %shift_lhs:gpr(s32) = COPY $w1
+    %cst:gpr(s32) = G_CONSTANT i32 31
+    %binop:gpr(s32) = G_AND %binop_lhs, %cst
+    %shift:gpr(s32) = G_ASHR %shift_lhs, %binop(s32)
+    $w0 = COPY %shift(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            ashr_and_s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: ashr_and_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %binop_lhs:gpr64 = COPY $x0
+    ; CHECK: %shift_lhs:gpr64 = COPY $x1
+    ; CHECK: %shift:gpr64 = ASRVXr %shift_lhs, %binop_lhs
+    ; CHECK: $x0 = COPY %shift
+    ; CHECK: RET_ReallyLR implicit $x0
+    %binop_lhs:gpr(s64) = COPY $x0
+    %shift_lhs:gpr(s64) = COPY $x1
+    %cst:gpr(s64) = G_CONSTANT i64 63
+    %binop:gpr(s64) = G_AND %binop_lhs, %cst
+    %shift:gpr(s64) = G_ASHR %shift_lhs, %binop(s64)
+    $x0 = COPY %shift(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            lshr_and_s32
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0, $w1
+    ; CHECK-LABEL: name: lshr_and_s32
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: %binop_lhs:gpr32 = COPY $w0
+    ; CHECK: %shift_lhs:gpr32 = COPY $w1
+    ; CHECK: %shift:gpr32 = LSRVWr %shift_lhs, %binop_lhs
+    ; CHECK: $w0 = COPY %shift
+    ; CHECK: RET_ReallyLR implicit $w0
+    %binop_lhs:gpr(s32) = COPY $w0
+    %shift_lhs:gpr(s32) = COPY $w1
+    %cst:gpr(s32) = G_CONSTANT i32 31
+    %binop:gpr(s32) = G_AND %binop_lhs, %cst
+    %shift:gpr(s32) = G_LSHR %shift_lhs, %binop(s32)
+    $w0 = COPY %shift(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            lshr_and_s64
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: lshr_and_s64
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %binop_lhs:gpr64 = COPY $x0
+    ; CHECK: %shift_lhs:gpr64 = COPY $x1
+    ; CHECK: %shift:gpr64 = LSRVXr %shift_lhs, %binop_lhs
+    ; CHECK: $x0 = COPY %shift
+    ; CHECK: RET_ReallyLR implicit $x0
+    %binop_lhs:gpr(s64) = COPY $x0
+    %shift_lhs:gpr(s64) = COPY $x1
+    %cst:gpr(s64) = G_CONSTANT i64 63
+    %binop:gpr(s64) = G_AND %binop_lhs, %cst
+    %shift:gpr(s64) = G_LSHR %shift_lhs, %binop(s64)
+    $x0 = COPY %shift(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            bad_and_mask
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0, $w1
+    ; The G_AND can modify the bottom 5 bits used by LSLVWr. The G_AND should
+    ; not be folded away.
+    ;
+    ; CHECK-LABEL: name: bad_and_mask
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: %binop_lhs:gpr32 = COPY $w0
+    ; CHECK: %shift_lhs:gpr32 = COPY $w1
+    ; CHECK: %binop:gpr32common = ANDWri %binop_lhs, 1921
+    ; CHECK: %shift:gpr32 = LSLVWr %shift_lhs, %binop
+    ; CHECK: $w0 = COPY %shift
+    ; CHECK: RET_ReallyLR implicit $w0
+    %binop_lhs:gpr(s32) = COPY $w0
+    %shift_lhs:gpr(s32) = COPY $w1
+    %cst:gpr(s32) = G_CONSTANT i32 12
+    %binop:gpr(s32) = G_AND %binop_lhs, %cst
+    %shift:gpr(s32) = G_SHL %shift_lhs, %binop(s32)
+    $w0 = COPY %shift(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            walk_past_zext
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0, $x1
+    ; CHECK-LABEL: name: walk_past_zext
+    ; CHECK: liveins: $w0, $x1
+    ; CHECK: %binop_lhs:gpr32all = COPY $w0
+    ; CHECK: %shift_lhs:gpr64 = COPY $x1
+    ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %binop_lhs, %subreg.sub_32
+    ; CHECK: %shift:gpr64 = LSLVXr %shift_lhs, [[SUBREG_TO_REG]]
+    ; CHECK: $x0 = COPY %shift
+    ; CHECK: RET_ReallyLR implicit $x0
+    %binop_lhs:gpr(s32) = COPY $w0
+    %shift_lhs:gpr(s64) = COPY $x1
+    %cst:gpr(s32) = G_CONSTANT i32 63
+    %binop:gpr(s32) = G_AND %binop_lhs, %cst
+    %ext_and:gpr(s64) = G_ZEXT %binop(s32)
+    %shift:gpr(s64) = G_SHL %shift_lhs, %ext_and(s64)
+    $x0 = COPY %shift(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            walk_past_anyext
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0, $x1
+    ; CHECK-LABEL: name: walk_past_anyext
+    ; CHECK: liveins: $w0, $x1
+    ; CHECK: %binop_lhs:gpr32all = COPY $w0
+    ; CHECK: %shift_lhs:gpr64 = COPY $x1
+    ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %binop_lhs, %subreg.sub_32
+    ; CHECK: %shift:gpr64 = LSLVXr %shift_lhs, [[SUBREG_TO_REG]]
+    ; CHECK: $x0 = COPY %shift
+    ; CHECK: RET_ReallyLR implicit $x0
+    %binop_lhs:gpr(s32) = COPY $w0
+    %shift_lhs:gpr(s64) = COPY $x1
+    %cst:gpr(s32) = G_CONSTANT i32 63
+    %binop:gpr(s32) = G_AND %binop_lhs, %cst
+    %ext_and:gpr(s64) = G_ANYEXT %binop(s32)
+    %shift:gpr(s64) = G_SHL %shift_lhs, %ext_and(s64)
+    $x0 = COPY %shift(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            dont_walk_past_sext
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0, $x1
+    ; CHECK-LABEL: name: dont_walk_past_sext
+    ; CHECK: liveins: $w0, $x1
+    ; CHECK: %binop_lhs:gpr32 = COPY $w0
+    ; CHECK: %shift_lhs:gpr64 = COPY $x1
+    ; CHECK: %binop:gpr32common = ANDWri %binop_lhs, 5
+    ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %binop, %subreg.sub_32
+    ; CHECK: %shift:gpr64 = LSLVXr %shift_lhs, [[SUBREG_TO_REG]]
+    ; CHECK: $x0 = COPY %shift
+    ; CHECK: RET_ReallyLR implicit $x0
+    %binop_lhs:gpr(s32) = COPY $w0
+    %shift_lhs:gpr(s64) = COPY $x1
+    %cst:gpr(s32) = G_CONSTANT i32 63
+    %binop:gpr(s32) = G_AND %binop_lhs, %cst
+    %ext_and:gpr(s64) = G_SEXT %binop(s32)
+    %shift:gpr(s64) = G_SHL %shift_lhs, %ext_and(s64)
+    $x0 = COPY %shift(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            walk_past_trunc
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $x0, $x1
+    ; CHECK-LABEL: name: walk_past_trunc
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK: %binop_lhs:gpr64all = COPY $x0
+    ; CHECK: %shift_lhs:gpr32 = COPY $w1
+    ; CHECK: [[COPY:%[0-9]+]]:gpr32all = COPY %binop_lhs.sub_32
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr32 = COPY [[COPY]]
+    ; CHECK: %shift:gpr32 = LSLVWr %shift_lhs, [[COPY1]]
+    ; CHECK: $w0 = COPY %shift
+    ; CHECK: RET_ReallyLR implicit $w0
+    %binop_lhs:gpr(s64) = COPY $x0
+    %shift_lhs:gpr(s32) = COPY $w1
+    %cst:gpr(s64) = G_CONSTANT i64 31
+    %binop:gpr(s64) = G_AND %binop_lhs, %cst
+    %binop_trunc:gpr(s32) = G_TRUNC %binop(s64)
+    %shift:gpr(s32) = G_SHL %shift_lhs, %binop_trunc(s32)
+    $w0 = COPY %shift(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            negative_value
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.1.entry:
+    liveins: $w0, $w1
+    ; We should be able to handle an all-ones value here.
+    ;
+    ; CHECK-LABEL: name: negative_value
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: %binop_lhs:gpr32 = COPY $w0
+    ; CHECK: %shift_lhs:gpr32 = COPY $w1
+    ; CHECK: %shift:gpr32 = LSLVWr %shift_lhs, %binop_lhs
+    ; CHECK: $w0 = COPY %shift
+    ; CHECK: RET_ReallyLR implicit $w0
+    %binop_lhs:gpr(s32) = COPY $w0
+    %shift_lhs:gpr(s32) = COPY $w1
+    %cst:gpr(s32) = G_CONSTANT i32 -1
+    %binop:gpr(s32) = G_AND %binop_lhs, %cst
+    %shift:gpr(s32) = G_SHL %shift_lhs, %binop(s32)
+    $w0 = COPY %shift(s32)
+    RET_ReallyLR implicit $w0