Index: include/llvm/CodeGen/MachineOperand.h
===================================================================
--- include/llvm/CodeGen/MachineOperand.h
+++ include/llvm/CodeGen/MachineOperand.h
@@ -919,6 +919,15 @@
     assert(isReg() && "Can only add reg operand to use lists");
     return Contents.Reg.Prev != nullptr;
   }
+
+public:
+  bool isOnRegUseListNext() const {
+    assert(isReg() && "Can only add reg operand to use lists");
+    if(Contents.Reg.Next)
+      return Contents.Reg.Next->Contents.Reg.Next != nullptr;
+    else
+      return false;
+  }
 };
 
 template <> struct DenseMapInfo<MachineOperand> {
Index: lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
===================================================================
--- lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/Value.h"
 #include <cassert>
 #include <cstdint>
 #include <iterator>
@@ -50,6 +51,7 @@
           "Number of load/store from unscaled generated");
 STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
 STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
+STATISTIC(NumAddressComputation, "Number of times Address Computation happened");
 
 // The LdStLimit limits how far we search for load/store pairs.
 static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
@@ -158,6 +160,11 @@
   mergeUpdateInsn(MachineBasicBlock::iterator I,
                   MachineBasicBlock::iterator Update, bool IsPreIdx);
 
+  // Merge Add instruction with ld/st instruction.
+  MachineBasicBlock::iterator
+  mergeAddWithLDSTInstruction(MachineBasicBlock::iterator I,
+                       MachineBasicBlock::iterator Update, bool IsPreIdx);
+  
   // Find and merge zero store instructions.
   bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
 
@@ -1373,12 +1380,154 @@
   return NextI;
 }
 
+// this API will return true if both instructions can fold.
+// e.g: ADDXrs x8, x9, x8, lsl #3
+//      STRXui xzr, [x8]
+// can merge to STRXrox xzr, [x9, x8, lsl #3]
+// Both instructions opcodes should 'X' or 'W'
+static bool isAddrFoldableInst(MachineBasicBlock::iterator Update,
+                MachineBasicBlock::iterator I) {
+  unsigned IOpc = I->getOpcode();
+  // will return true if both instructions are ADDXrs and (STRXui or LDRXui).
+  if (Update->getOpcode() == AArch64::ADDXrs) {
+    switch(IOpc) {
+      default:
+        return false;
+      case AArch64::STRXui:     
+      case AArch64::LDRXui:
+        return true;
+    }
+  }
+  // will return true if both instructions are ADDWrs and (STRWui and LDRWui).
+  else if (Update->getOpcode() == AArch64::ADDWrs) {
+    switch(IOpc) {
+      default:
+        return false;
+      case AArch64::STRWui:
+      case AArch64::LDRWui:
+        return true;
+    }
+  }
+  return false;
+}
+
+// will return aarch64 target opcode for merged(new) instruction.
+static unsigned getTargetOpcodeForFoldInst(MachineBasicBlock::iterator Update,
+                                MachineBasicBlock::iterator I) {
+  unsigned UpdateOpc = Update->getOpcode();
+  unsigned IOpc = I->getOpcode();
+  if(UpdateOpc == AArch64::ADDXrs ||
+     UpdateOpc == AArch64::ADDWrs) {
+    switch(IOpc) {
+      default:
+        break;
+      case AArch64::STRXui:
+        return AArch64::STRXroX;
+      case AArch64::STRWui:
+        return AArch64::STRWroX;
+      case AArch64::LDRXui:
+        return AArch64::LDRXroX;
+      case AArch64::LDRWui:
+        return AArch64::LDRWroX;
+    }
+  }
+  return false;
+}
+
+// here will merge the both ADD and STR/LDR instructions if able to merge both 
+// and will create new instruction.
+MachineBasicBlock::iterator
+AArch64LoadStoreOpt::mergeAddWithLDSTInstruction(MachineBasicBlock::iterator I,
+                                     MachineBasicBlock::iterator Update,
+                                     bool IsPreIdx) {
+  MachineBasicBlock::iterator NextI = I;
+  // Return the instruction following the merged instruction, which is
+  // the instruction following our unmerged store. Unless that's the add/sub
+  // instruction we're merging, in which case it's the one after that.
+  if (++NextI == Update)
+    ++NextI;
+  // getting the immediate value of operand three
+  int Value = Update->getOperand(3).getImm();
+  //calculating the scale value
+  unsigned int ScaleVal = getMemScale(*I) / Value;
+  // getting the matching target opcode.
+  unsigned Opc = getTargetOpcodeForFoldInst(Update, I);
+  // here changing the second operand kill status for future use.
+  // otherwise this operand is deleting.
+  Update->getOperand(1).setIsKill(false);
+  Update->getOperand(2).setIsKill(false);
+  
+  MachineInstrBuilder MIB;
+  if (!isPairedLdSt(*I)) {
+    // Non-paired instruction.
+    MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(Opc))
+             .add(I->getOperand(0))
+             .add(Update->getOperand(1))
+             .add(Update->getOperand(2))
+             .addImm(0)
+             .addImm(Value)
+             .setMemRefs(I->memoperands())
+             .setMIFlags(I->mergeFlagsWith(*Update));
+   } else {
+     // Paired instruction.
+     MIB = BuildMI(*I->getParent(), I, I->getDebugLoc(), TII->get(Opc))
+             .add(I->getOperand(0))
+             .add(getLdStRegOp(*Update, 0))
+             .add(getLdStRegOp(*Update, 1))
+             .add(Update->getOperand(2))
+             .addImm(0)
+             .addImm(ScaleVal)
+             .setMemRefs(I->memoperands())
+             .setMIFlags(I->mergeFlagsWith(*I));
+   }
+
+   ++NumAddressComputation;
+   LLVM_DEBUG(dbgs() << "Creating Address Computation.");
+   LLVM_DEBUG(dbgs() << "    Replacing instructions:\n    ");
+   LLVM_DEBUG(I->print(dbgs()));
+   LLVM_DEBUG(dbgs() << "    ");
+   LLVM_DEBUG(Update->print(dbgs()));
+   LLVM_DEBUG(dbgs() << "  with instruction:\n    ");
+   LLVM_DEBUG(((MachineInstr *)MIB)->print(dbgs()));
+   LLVM_DEBUG(dbgs() << "\n");
+
+   // Operand one is using in any instructions in below, 
+   // we have to change the position of ADD(Update) instruction.i
+   // because of this adding the new instruction(same as ADD)
+   // after new merged instruction.
+
+   if(I->getOperand(1).isOnRegUseListNext()) {
+     MachineBasicBlock *MBB = I->getParent();
+     MBB->splice(I, MBB, &*Update);
+   }
+   else {
+     Update->eraseFromParent();
+   }
+   // Erase the old instructions for the block.
+   I->eraseFromParent();
+   return NextI;
+}
+
 bool AArch64LoadStoreOpt::isMatchingUpdateInsn(MachineInstr &MemMI,
                                                MachineInstr &MI,
                                                unsigned BaseReg, int Offset) {
   switch (MI.getOpcode()) {
   default:
     break;
+  case AArch64::ADDWrs:
+  case AArch64::ADDXrs:
+  {
+     if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isReg() || !MI.getOperand(3).isImm())
+       break;
+     int ShiftValue = MI.getOperand(3).getImm();
+     if (ShiftValue > 4)
+       break;
+     // The update instruction source and destination register must be the
+     // same as the load/store base register.
+     if (MI.getOperand(0).getReg() != BaseReg || Offset != 0)
+       break;
+     return true;
+  }
   case AArch64::SUBXri:
   case AArch64::ADDXri:
     // Make sure it's a vanilla immediate operand, not a relocation or
@@ -1633,9 +1782,12 @@
   // ldr x0, [x20], #32
   Update = findMatchingUpdateInsnForward(MBBI, 0, UpdateLimit);
   if (Update != E) {
-    // Merge the update into the ld/st.
-    MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
-    return true;
+    if (Update->getOpcode() == AArch64::ADDXri ||
+        Update->getOpcode() == AArch64::SUBXri) {
+      // Merge the update into the ld/st.
+      MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/false);
+      return true;
+    }
   }
 
   // Don't know how to handle unscaled pre/post-index versions below, so bail.
@@ -1643,15 +1795,28 @@
     return false;
 
   // Look back to try to find a pre-index instruction. For example,
-  // add x0, x0, #8
-  // ldr x1, [x0]
-  //   merged into:
-  // ldr x1, [x0, #8]!
   Update = findMatchingUpdateInsnBackward(MBBI, UpdateLimit);
   if (Update != E) {
-    // Merge the update into the ld/st.
-    MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
-    return true;
+    // if Update Inst Opcode is ADDXrs, 
+    // add x8, x9, x8, lsl #3
+    // str xzr, [x8]
+    // merged into:
+    // str xzr, [x9,x8, lsl #3]
+    // if Update(ADD inst) opcode is either ADDXrs or ADDWrs
+    if (isAddrFoldableInst(Update, MBBI)) {
+        MBBI = mergeAddWithLDSTInstruction(MBBI, Update, true);
+        return true;
+    }
+    // add x0, x0, #8
+    // ldr x1, [x0]
+    //   merged into:
+    // ldr x1, [x0, #8]!
+    else if (Update->getOpcode() == AArch64::ADDXri ||
+             Update->getOpcode() == AArch64::SUBXri) {
+      // Merge the update into the ld/st.
+      MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
+      return true;
+    }
   }
 
   // The immediate in the load/store is scaled by the size of the memory
@@ -1666,9 +1831,12 @@
   // ldr x1, [x0, #64]!
   Update = findMatchingUpdateInsnForward(MBBI, UnscaledOffset, UpdateLimit);
   if (Update != E) {
-    // Merge the update into the ld/st.
-    MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
-    return true;
+    if(Update->getOpcode() == AArch64::ADDXri ||
+        Update->getOpcode() == AArch64::SUBXri) {
+      // Merge the update into the ld/st.
+      MBBI = mergeUpdateInsn(MBBI, Update, /*IsPreIdx=*/true);
+      return true;
+    }
   }
 
   return false;
Index: test/CodeGen/AArch64/fold_addressing_modes_aarch64.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/fold_addressing_modes_aarch64.ll
@@ -0,0 +1,47 @@
+; RUN: llc -o - %s -mtriple=aarch64-arm-none-eabi -verify-machineinstrs | FileCheck %s
+; ModuleID = './test_51309.c'
+source_filename = "./test_51309.c"
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-arm-none-eabi"
+
+%struct.As = type { i32, i32 }
+%struct.Bs = type { i16 }
+
+@A = external dso_local local_unnamed_addr global [4 x %struct.As], align 4
+@B = external dso_local local_unnamed_addr global %struct.Bs*, align 8
+
+; Function Attrs: minsize norecurse nounwind optsizei
+
+; CHECK_LABEL: @test
+; CHECK: adrp
+; CHECK: ldr
+; CHECK-NEXT: ldrsh
+; CHECK-NOT: add
+; CHECK-NEXT: str
+define dso_local void @test() local_unnamed_addr #0 {
+  %1 = load %struct.Bs*, %struct.Bs** @B, align 8, !tbaa !2
+  %2 = getelementptr inbounds %struct.Bs, %struct.Bs* %1, i64 0, i32 0
+  %3 = load i16, i16* %2, align 2, !tbaa !6
+  %4 = sext i16 %3 to i64
+  %5 = getelementptr inbounds [4 x %struct.As], [4 x %struct.As]* @A, i64 0, i64 %4, i32 0
+  %6 = bitcast i32* %5 to <2 x i32>*
+  store <2 x i32> zeroinitializer, <2 x i32>* %6, align 4, !tbaa !9
+  ret void
+}
+
+attributes #0 = { minsize norecurse nounwind optsize "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a53" "target-features"="+aes,+crc,+crypto,+fp-armv8,+neon,+sha2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 9.0.0 (https://git.llvm.org/git/clang.git/ 268b249f1d4cbc212d1853ac9821194f868eef36) (https://git.llvm.org/git/llvm.git/ 772398facdeaf5e5f4f8ca641e06f354441ad9ac)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"any pointer", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !8, i64 0}
+!7 = !{!"Bs", !8, i64 0}
+!8 = !{!"short", !4, i64 0}
+!9 = !{!10, !10, i64 0}
+!10 = !{!"int", !4, i64 0}