diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -1060,12 +1060,12 @@
                                    Ptr->getType()->getPointerAddressSpace());
     // Vector GEP
     if (Ptr->getType()->isVectorTy()) {
-      unsigned NumElem = Ptr->getType()->getVectorNumElements();
+      ElementCount NumElem = Ptr->getType()->getVectorElementCount();
       return VectorType::get(PtrTy, NumElem);
     }
     for (Value *Index : IdxList)
       if (Index->getType()->isVectorTy()) {
-        unsigned NumElem = Index->getType()->getVectorNumElements();
+        ElementCount NumElem = Index->getType()->getVectorElementCount();
         return VectorType::get(PtrTy, NumElem);
       }
     // Scalar GEP
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5252,7 +5252,8 @@
     // amounts.  This catches things like trying to shift an i1024 value by an
     // i8, which is easy to fall into in generic code that uses
     // TLI.getShiftAmount().
-    assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) &&
+    assert(N2.getScalarValueSizeInBits() >=
+               Log2_32_Ceil(N1.getScalarValueSizeInBits()) &&
            "Invalid use of small shift amount with oversized value!");
 
     // Always fold shifts of i1 values so the code generator doesn't need to
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3876,13 +3876,17 @@
 
   // Normalize Vector GEP - all scalar operands should be converted to the
   // splat vector.
-  unsigned VectorWidth = I.getType()->isVectorTy() ?
-    I.getType()->getVectorNumElements() : 0;
+  bool IsVectorGEP = I.getType()->isVectorTy();
+  ElementCount VectorElementCount = IsVectorGEP ?
+    I.getType()->getVectorElementCount() : ElementCount(0, false);
 
-  if (VectorWidth && !N.getValueType().isVector()) {
+  if (IsVectorGEP && !N.getValueType().isVector()) {
     LLVMContext &Context = *DAG.getContext();
-    EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
-    N = DAG.getSplatBuildVector(VT, dl, N);
+    EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorElementCount);
+    if (VectorElementCount.Scalable)
+      N = DAG.getSplatVector(VT, dl, N);
+    else
+      N = DAG.getSplatBuildVector(VT, dl, N);
   }
 
   for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
@@ -3904,9 +3908,16 @@
                         DAG.getConstant(Offset, dl, N.getValueType()), Flags);
       }
     } else {
+      // IdxSize is the width of the arithmetic according to IR semantics.
+      // In SelectionDAG, we may prefer to do arithmetic in a wider bitwidth
+      // (and fix up the result later).
       unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
       MVT IdxTy = MVT::getIntegerVT(IdxSize);
-      APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
+      TypeSize ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+      // We intentionally mask away the high bits here; ElementSize may not
+      // fit in IdxTy.
+      APInt ElementMul(IdxSize, ElementSize.getKnownMinSize());
+      bool ElementScalable = ElementSize.isScalable();
 
       // If this is a scalar constant or a splat vector of constants,
       // handle it quickly.
@@ -3914,14 +3925,18 @@
       if (C && isa<VectorType>(C->getType()))
         C = C->getSplatValue();
 
-      if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
-        if (CI->isZero())
-          continue;
-        APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
+      const auto *CI = dyn_cast_or_null<ConstantInt>(C);
+      if (CI && CI->isZero())
+        continue;
+      if (CI && !ElementScalable) {
+        APInt Offs = ElementMul * CI->getValue().sextOrTrunc(IdxSize);
         LLVMContext &Context = *DAG.getContext();
-        SDValue OffsVal = VectorWidth ?
-          DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
-          DAG.getConstant(Offs, dl, IdxTy);
+        SDValue OffsVal;
+        if (IsVectorGEP)
+          OffsVal = DAG.getConstant(
+              Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorElementCount));
+        else
+          OffsVal = DAG.getConstant(Offs, dl, IdxTy);
 
         // In an inbounds GEP with an offset that is nonnegative even when
         // interpreted as signed, assume there is no unsigned overflow.
@@ -3935,12 +3950,16 @@
         continue;
       }
 
-      // N = N + Idx * ElementSize;
+      // N = N + Idx * ElementMul;
       SDValue IdxN = getValue(Idx);
 
-      if (!IdxN.getValueType().isVector() && VectorWidth) {
-        EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
-        IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
+      if (!IdxN.getValueType().isVector() && IsVectorGEP) {
+        EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(),
+                                  VectorElementCount);
+        if (VectorElementCount.Scalable)
+          IdxN = DAG.getSplatVector(VT, dl, IdxN);
+        else
+          IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
       }
 
       // If the index is smaller or larger than intptr_t, truncate or extend
@@ -3949,19 +3968,27 @@
 
       // If this is a multiply by a power of two, turn it into a shl
       // immediately.  This is a very common case.
-      if (ElementSize != 1) {
-        if (ElementSize.isPowerOf2()) {
-          unsigned Amt = ElementSize.logBase2();
+      if (ElementMul != 1) {
+        if (ElementMul.isPowerOf2()) {
+          unsigned Amt = ElementMul.logBase2();
           IdxN = DAG.getNode(ISD::SHL, dl,
                              N.getValueType(), IdxN,
                              DAG.getConstant(Amt, dl, IdxN.getValueType()));
         } else {
-          SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl,
+          SDValue Scale = DAG.getConstant(ElementMul.getZExtValue(), dl,
                                           IdxN.getValueType());
           IdxN = DAG.getNode(ISD::MUL, dl,
                              N.getValueType(), IdxN, Scale);
         }
       }
+      if (ElementScalable) {
+        EVT VScaleTy = N.getValueType().getScalarType();
+        SDValue VScale = DAG.getNode(ISD::VSCALE, dl, VScaleTy,
+                                     DAG.getConstant(1, dl, VScaleTy));
+        if (IsVectorGEP)
+          VScale = DAG.getSplatVector(N.getValueType(), dl, VScale);
+        IdxN = DAG.getNode(ISD::MUL, dl, N.getValueType(), IdxN, VScale);
+      }
 
       N = DAG.getNode(ISD::ADD, dl,
                       N.getValueType(), N, IdxN);
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -985,6 +985,8 @@
   defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", null_frag>;
   defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", null_frag>;
 
+  def : Pat<(nxv2i64 (shl nxv2i64:$Zdn, nxv2i64:$Zm)), (LSL_ZPmZ_D (PTRUE_D 31), $Zdn, $Zm)>;
+
   defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>;
   defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>;
   defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>;
diff --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll
new file mode 100644
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-gep.ll
@@ -0,0 +1,103 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define <2 x <vscale x 2 x i64>*> @fixed_of_scalable_1(<vscale x 2 x i64>* %base) {
+; CHECK-LABEL: fixed_of_scalable_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    lsr x8, x8, #4
+; CHECK-NEXT:    dup v1.2d, x8
+; CHECK-NEXT:    dup v0.2d, x0
+; CHECK-NEXT:    shl v1.2d, v1.2d, #4
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+  %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, <2 x i64> <i64 1, i64 1>
+  ret <2 x <vscale x 2 x i64>*> %d
+}
+
+define <2 x <vscale x 2 x i64>*> @fixed_of_scalable_2(<2 x <vscale x 2 x i64>*> %base) {
+; CHECK-LABEL: fixed_of_scalable_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    rdvl x8, #1
+; CHECK-NEXT:    lsr x8, x8, #4
+; CHECK-NEXT:    dup v1.2d, x8
+; CHECK-NEXT:    shl v1.2d, v1.2d, #4
+; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    ret
+  %d = getelementptr <vscale x 2 x i64>, <2 x <vscale x 2 x i64>*> %base, <2 x i64> <i64 1, i64 1>
+  ret <2 x <vscale x 2 x i64>*> %d
+}
+
+define <vscale x 2 x i8*> @scalable_of_fixed_1(i8* %base) {
+; CHECK-LABEL: scalable_of_fixed_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov z0.d, x0
+; CHECK-NEXT:    add z0.d, z0.d, #1 // =0x1
+; CHECK-NEXT:    ret
+  %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %d = getelementptr i8, i8* %base, <vscale x 2 x i64> %idx
+  ret <vscale x 2 x i8*> %d
+}
+
+define <vscale x 2 x i8*> @scalable_of_fixed_2(<vscale x 2 x i8*> %base) {
+; CHECK-LABEL: scalable_of_fixed_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add z0.d, z0.d, #1 // =0x1
+; CHECK-NEXT:    ret
+  %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %d = getelementptr i8, <vscale x 2 x i8*> %base, <vscale x 2 x i64> %idx
+  ret <vscale x 2 x i8*> %d
+}
+
+define <vscale x 2 x i8*> @scalable_of_fixed_3(i8* %base, <vscale x 2 x i32> %idx) {
+; CHECK-LABEL: scalable_of_fixed_3:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sxtw z0.d, p0/m, z0.d
+; CHECK-NEXT:    mov z1.d, x0
+; CHECK-NEXT:    add z0.d, z1.d, z0.d
+; CHECK-NEXT:    ret
+  %d = getelementptr i8, i8* %base, <vscale x 2 x i32> %idx
+  ret <vscale x 2 x i8*> %d
+}
+
+define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_1(<vscale x 2 x i64>* %base) {
+; CHECK-LABEL: scalable_of_scalable_1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w9, #4
+; CHECK-NEXT:    rdvl x10, #1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z0.d, x8
+; CHECK-NEXT:    mov z1.d, x9
+; CHECK-NEXT:    lsr x8, x10, #4
+; CHECK-NEXT:    lsl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mul z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    mov z1.d, x0
+; CHECK-NEXT:    add z0.d, z1.d, z0.d
+; CHECK-NEXT:    ret
+  %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %base, <vscale x 2 x i64> %idx
+  ret <vscale x 2 x <vscale x 2 x i64>*> %d
+}
+
+define <vscale x 2 x <vscale x 2 x i64>*> @scalable_of_scalable_2(<vscale x 2 x <vscale x 2 x i64>*> %base) {
+; CHECK-LABEL: scalable_of_scalable_2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    mov w8, #1
+; CHECK-NEXT:    mov w9, #4
+; CHECK-NEXT:    rdvl x10, #1
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    mov z1.d, x8
+; CHECK-NEXT:    mov z2.d, x9
+; CHECK-NEXT:    lsr x8, x10, #4
+; CHECK-NEXT:    lsl z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    mov z2.d, x8
+; CHECK-NEXT:    mul z1.d, p0/m, z1.d, z2.d
+; CHECK-NEXT:    add z0.d, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %idx = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 1, i32 0), <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> zeroinitializer
+  %d = getelementptr <vscale x 2 x i64>, <vscale x 2 x <vscale x 2 x i64>*> %base, <vscale x 2 x i64> %idx
+  ret <vscale x 2 x <vscale x 2 x i64>*> %d
+}