diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -43,8 +43,7 @@
 public:
   WebAssemblyDAGToDAGISel(WebAssemblyTargetMachine &TM,
                           CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(TM, OptLevel), Subtarget(nullptr) {
-  }
+      : SelectionDAGISel(TM, OptLevel), Subtarget(nullptr) {}
 
   StringRef getPassName() const override {
     return "WebAssembly Instruction Selection";
@@ -67,11 +66,19 @@
   bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
 
+  bool SelectLoadOperands32(SDValue Op, SDValue &Offset, SDValue &Addr);
+  bool SelectLoadOperands64(SDValue Op, SDValue &Offset, SDValue &Addr);
+
 // Include the pieces autogenerated from the target description.
 #include "WebAssemblyGenDAGISel.inc"
 
 private:
   // add select functions here...
+
+  bool SelectLoadOperands(MVT AddrType, unsigned ConstOpc, SDValue Op,
+                          SDValue &Offset, SDValue &Addr);
+  bool SelectLoadAddOperands(MVT OffsetType, SDValue N, SDValue &Offset,
+                             SDValue &Addr);
 };
 } // end anonymous namespace
 
@@ -281,6 +288,118 @@
   return true;
 }
 
+bool WebAssemblyDAGToDAGISel::SelectLoadAddOperands(MVT OffsetType, SDValue N,
+                                                    SDValue &Offset,
+                                                    SDValue &Addr) {
+  assert(N.getNumOperands() == 2 && "Attempting to fold in a non-binary op");
+
+  // Fold target global addresses in an add into the offset.
+  if (!TM.isPositionIndependent()) {
+    for (size_t i = 0; i < 2; ++i) {
+      SDValue Op = N.getOperand(i);
+      SDValue OtherOp = N.getOperand(i == 0 ? 1 : 0);
+
+      if (Op.getOpcode() == WebAssemblyISD::Wrapper)
+        Op = Op.getOperand(0);
+
+      if (Op.getOpcode() == ISD::TargetGlobalAddress) {
+        Offset = Op;
+        Addr = OtherOp;
+        return true;
+      }
+    }
+  }
+
+  // WebAssembly constant offsets are performed as unsigned with
+  // infinite precision, so we need to check for NoUnsignedWrap so
+  // that we don't fold an offset for an add that needs wrapping.
+  if (N.getOpcode() == ISD::ADD && !N.getNode()->getFlags().hasNoUnsignedWrap())
+    return false;
+
+  // Folds constants in an add into the offset.
+  for (size_t i = 0; i < 2; ++i) {
+    SDValue Op = N.getOperand(i);
+    SDValue OtherOp = N.getOperand(i == 0 ? 1 : 0);
+
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) {
+      Offset =
+          CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), OffsetType);
+      Addr = OtherOp;
+      return true;
+    }
+  }
+  return false;
+}
+
+bool WebAssemblyDAGToDAGISel::SelectLoadOperands(MVT AddrTyp, unsigned ConstOpc,
+                                                 SDValue N, SDValue &Offset,
+                                                 SDValue &Addr) {
+  SDLoc DL(N);
+
+  // Fold target global addresses into the offset.
+  if (!TM.isPositionIndependent()) {
+    SDValue Op(N);
+    if (Op.getOpcode() == WebAssemblyISD::Wrapper)
+      Op = Op.getOperand(0);
+
+    if (Op.getOpcode() == ISD::TargetGlobalAddress) {
+      Offset = Op;
+      Addr = SDValue(
+          CurDAG->getMachineNode(ConstOpc, DL, AddrTyp,
+                                 CurDAG->getTargetConstant(0, DL, AddrTyp)),
+          0);
+      return true;
+    }
+  }
+
+  // Fold anything inside an add into the offset.
+  if (N.getOpcode() == ISD::ADD &&
+      SelectLoadAddOperands(AddrTyp, N, Offset, Addr))
+    return true;
+
+  // Likewise, treat an 'or' node as an 'add' if the or'ed bits are
+  // known to be zero and fold them into the offset too.
+  if (N.getOpcode() == ISD::OR) {
+    bool OrIsAdd;
+    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+      OrIsAdd =
+          CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue());
+    } else {
+      KnownBits Known0 = CurDAG->computeKnownBits(N->getOperand(0), 0);
+      KnownBits Known1 = CurDAG->computeKnownBits(N->getOperand(1), 0);
+      OrIsAdd = (~Known0.Zero & ~Known1.Zero) == 0;
+    }
+
+    if (OrIsAdd && SelectLoadAddOperands(AddrTyp, N, Offset, Addr))
+      return true;
+  }
+
+  // Fold constant addresses into the offset.
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+    Offset = CurDAG->getTargetConstant(CN->getZExtValue(), DL, AddrTyp);
+    Addr = SDValue(
+        CurDAG->getMachineNode(ConstOpc, DL, AddrTyp,
+                               CurDAG->getTargetConstant(0, DL, AddrTyp)),
+        0);
+    return true;
+  }
+
+  // Else it's a plain old load with no offset.
+  Offset = CurDAG->getTargetConstant(0, DL, AddrTyp);
+  Addr = N;
+  return true;
+}
+
+bool WebAssemblyDAGToDAGISel::SelectLoadOperands32(SDValue Op, SDValue &Offset,
+                                                   SDValue &Addr) {
+  return SelectLoadOperands(MVT::i32, WebAssembly::CONST_I32, Op, Offset, Addr);
+}
+
+bool WebAssemblyDAGToDAGISel::SelectLoadOperands64(SDValue Op, SDValue &Offset,
+                                                   SDValue &Addr) {
+  return SelectLoadOperands(MVT::i64, WebAssembly::CONST_I64, Op, Offset, Addr);
+}
+
 /// This pass converts a legalized DAG into a WebAssembly-specific DAG, ready
 /// for instruction scheduling.
 FunctionPass *llvm::createWebAssemblyISelDag(WebAssemblyTargetMachine &TM,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -226,25 +226,9 @@
 defm ATOMIC_LOAD_I32 : AtomicLoad<I32, "i32.atomic.load", 0x10>;
 defm ATOMIC_LOAD_I64 : AtomicLoad<I64, "i64.atomic.load", 0x11>;
 
-// Select loads with no constant offset.
-defm : LoadPatNoOffset<i32, atomic_load_32, "ATOMIC_LOAD_I32">;
-defm : LoadPatNoOffset<i64, atomic_load_64, "ATOMIC_LOAD_I64">;
-
-// Select loads with a constant offset.
-
-// Pattern with address + immediate offset
-defm : LoadPatImmOff<i32, atomic_load_32, regPlusImm, "ATOMIC_LOAD_I32">;
-defm : LoadPatImmOff<i64, atomic_load_64, regPlusImm, "ATOMIC_LOAD_I64">;
-defm : LoadPatImmOff<i32, atomic_load_32, or_is_add, "ATOMIC_LOAD_I32">;
-defm : LoadPatImmOff<i64, atomic_load_64, or_is_add, "ATOMIC_LOAD_I64">;
-
-// Select loads with just a constant offset.
-defm : LoadPatOffsetOnly<i32, atomic_load_32, "ATOMIC_LOAD_I32">;
-defm : LoadPatOffsetOnly<i64, atomic_load_64, "ATOMIC_LOAD_I64">;
-
-defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_32, "ATOMIC_LOAD_I32">;
-defm : LoadPatGlobalAddrOffOnly<i64, atomic_load_64, "ATOMIC_LOAD_I64">;
-
+// Select loads
+defm : LoadPat<i32, atomic_load_32, "ATOMIC_LOAD_I32">;
+defm : LoadPat<i64, atomic_load_64, "ATOMIC_LOAD_I64">;
 
 // Extending loads. Note that there are only zero-extending atomic loads, no
 // sign-extending loads.
@@ -283,54 +267,18 @@
 def sext_aload_16_64 :
   PatFrag<(ops node:$addr), (anyext (i32 (atomic_load_16 node:$addr)))>;
 
-// Select zero-extending loads with no constant offset.
-defm : LoadPatNoOffset<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
-defm : LoadPatNoOffset<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
-defm : LoadPatNoOffset<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">;
+// Select zero-extending loads
+defm : LoadPat<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
+defm : LoadPat<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
+defm : LoadPat<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">;
 
-// Select sign-extending loads with no constant offset
-defm : LoadPatNoOffset<i32, atomic_load_8, "ATOMIC_LOAD8_U_I32">;
-defm : LoadPatNoOffset<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">;
-defm : LoadPatNoOffset<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
-defm : LoadPatNoOffset<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
+// Select sign-extending loads
+defm : LoadPat<i32, atomic_load_8, "ATOMIC_LOAD8_U_I32">;
+defm : LoadPat<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">;
+defm : LoadPat<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
+defm : LoadPat<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
 // 32->64 sext load gets selected as i32.atomic.load, i64.extend_i32_s
 
-// Zero-extending loads with constant offset
-defm : LoadPatImmOff<i64, zext_aload_8_64, regPlusImm, "ATOMIC_LOAD8_U_I64">;
-defm : LoadPatImmOff<i64, zext_aload_16_64, regPlusImm, "ATOMIC_LOAD16_U_I64">;
-defm : LoadPatImmOff<i64, zext_aload_32_64, regPlusImm, "ATOMIC_LOAD32_U_I64">;
-defm : LoadPatImmOff<i64, zext_aload_8_64, or_is_add, "ATOMIC_LOAD8_U_I64">;
-defm : LoadPatImmOff<i64, zext_aload_16_64, or_is_add, "ATOMIC_LOAD16_U_I64">;
-defm : LoadPatImmOff<i64, zext_aload_32_64, or_is_add, "ATOMIC_LOAD32_U_I64">;
-
-// Sign-extending loads with constant offset
-defm : LoadPatImmOff<i32, atomic_load_8, regPlusImm, "ATOMIC_LOAD8_U_I32">;
-defm : LoadPatImmOff<i32, atomic_load_16, regPlusImm, "ATOMIC_LOAD16_U_I32">;
-defm : LoadPatImmOff<i32, atomic_load_8, or_is_add, "ATOMIC_LOAD8_U_I32">;
-defm : LoadPatImmOff<i32, atomic_load_16, or_is_add, "ATOMIC_LOAD16_U_I32">;
-defm : LoadPatImmOff<i64, sext_aload_8_64, regPlusImm, "ATOMIC_LOAD8_U_I64">;
-defm : LoadPatImmOff<i64, sext_aload_16_64, regPlusImm, "ATOMIC_LOAD16_U_I64">;
-defm : LoadPatImmOff<i64, sext_aload_8_64, or_is_add, "ATOMIC_LOAD8_U_I64">;
-defm : LoadPatImmOff<i64, sext_aload_16_64, or_is_add, "ATOMIC_LOAD16_U_I64">;
-// No 32->64 patterns, just use i32.atomic.load and i64.extend_s/i64
-
-// Extending loads with just a constant offset
-defm : LoadPatOffsetOnly<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
-defm : LoadPatOffsetOnly<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
-defm : LoadPatOffsetOnly<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">;
-defm : LoadPatOffsetOnly<i32, atomic_load_8, "ATOMIC_LOAD8_U_I32">;
-defm : LoadPatOffsetOnly<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">;
-defm : LoadPatOffsetOnly<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
-defm : LoadPatOffsetOnly<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
-
-defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
-defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
-defm : LoadPatGlobalAddrOffOnly<i64, zext_aload_32_64, "ATOMIC_LOAD32_U_I64">;
-defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_8, "ATOMIC_LOAD8_U_I32">;
-defm : LoadPatGlobalAddrOffOnly<i32, atomic_load_16, "ATOMIC_LOAD16_U_I32">;
-defm : LoadPatGlobalAddrOffOnly<i64, sext_aload_8_64, "ATOMIC_LOAD8_U_I64">;
-defm : LoadPatGlobalAddrOffOnly<i64, sext_aload_16_64, "ATOMIC_LOAD16_U_I64">;
-
 
 //===----------------------------------------------------------------------===//
 // Atomic stores
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -66,70 +66,6 @@
 defm LOAD_F32 : WebAssemblyLoad<F32, "f32.load", 0x2a, []>;
 defm LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b, []>;
 
-// Select loads with no constant offset.
-multiclass LoadPatNoOffset<ValueType ty, SDPatternOperator kind, string inst> {
-  def : Pat<(ty (kind I32:$addr)), (!cast<NI>(inst # "_A32") 0, 0, I32:$addr)>,
-        Requires<[HasAddr32]>;
-  def : Pat<(ty (kind (i64 I64:$addr))), (!cast<NI>(inst # "_A64") 0, 0, I64:$addr)>,
-        Requires<[HasAddr64]>;
-}
-
-defm : LoadPatNoOffset<i32, load, "LOAD_I32">;
-defm : LoadPatNoOffset<i64, load, "LOAD_I64">;
-defm : LoadPatNoOffset<f32, load, "LOAD_F32">;
-defm : LoadPatNoOffset<f64, load, "LOAD_F64">;
-
-// Select loads with a constant offset.
-
-// Pattern with address + immediate offset
-multiclass LoadPatImmOff<ValueType ty, SDPatternOperator kind, PatFrag operand,
-                         string inst> {
-  def : Pat<(ty (kind (operand I32:$addr, imm:$off))),
-            (!cast<NI>(inst # "_A32") 0, imm:$off, I32:$addr)>,
-        Requires<[HasAddr32]>;
-  def : Pat<(ty (kind (operand I64:$addr, imm:$off))),
-            (!cast<NI>(inst # "_A64") 0, imm:$off, I64:$addr)>,
-        Requires<[HasAddr64]>;
-}
-
-defm : LoadPatImmOff<i32, load, regPlusImm, "LOAD_I32">;
-defm : LoadPatImmOff<i64, load, regPlusImm, "LOAD_I64">;
-defm : LoadPatImmOff<f32, load, regPlusImm, "LOAD_F32">;
-defm : LoadPatImmOff<f64, load, regPlusImm, "LOAD_F64">;
-defm : LoadPatImmOff<i32, load, or_is_add, "LOAD_I32">;
-defm : LoadPatImmOff<i64, load, or_is_add, "LOAD_I64">;
-defm : LoadPatImmOff<f32, load, or_is_add, "LOAD_F32">;
-defm : LoadPatImmOff<f64, load, or_is_add, "LOAD_F64">;
-
-// Select loads with just a constant offset.
-multiclass LoadPatOffsetOnly<ValueType ty, SDPatternOperator kind, string inst> {
-  def : Pat<(ty (kind imm:$off)),
-            (!cast<NI>(inst # "_A32") 0, imm:$off, (CONST_I32 0))>,
-        Requires<[HasAddr32]>;
-  def : Pat<(ty (kind imm:$off)),
-            (!cast<NI>(inst # "_A64") 0, imm:$off, (CONST_I64 0))>,
-        Requires<[HasAddr64]>;
-}
-
-defm : LoadPatOffsetOnly<i32, load, "LOAD_I32">;
-defm : LoadPatOffsetOnly<i64, load, "LOAD_I64">;
-defm : LoadPatOffsetOnly<f32, load, "LOAD_F32">;
-defm : LoadPatOffsetOnly<f64, load, "LOAD_F64">;
-
-multiclass LoadPatGlobalAddrOffOnly<ValueType ty, SDPatternOperator kind, string inst> {
-  def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off))),
-            (!cast<NI>(inst # "_A32") 0, tglobaladdr:$off, (CONST_I32 0))>,
-        Requires<[IsNotPIC, HasAddr32]>;
-  def : Pat<(ty (kind (WebAssemblyWrapper tglobaladdr:$off))),
-            (!cast<NI>(inst # "_A64") 0, tglobaladdr:$off, (CONST_I64 0))>,
-        Requires<[IsNotPIC, HasAddr64]>;
-}
-
-defm : LoadPatGlobalAddrOffOnly<i32, load, "LOAD_I32">;
-defm : LoadPatGlobalAddrOffOnly<i64, load, "LOAD_I64">;
-defm : LoadPatGlobalAddrOffOnly<f32, load, "LOAD_F32">;
-defm : LoadPatGlobalAddrOffOnly<f64, load, "LOAD_F64">;
-
 // Extending load.
 defm LOAD8_S_I32 : WebAssemblyLoad<I32, "i32.load8_s", 0x2c, []>;
 defm LOAD8_U_I32 : WebAssemblyLoad<I32, "i32.load8_u", 0x2d, []>;
@@ -142,98 +78,57 @@
 defm LOAD32_S_I64 : WebAssemblyLoad<I64, "i64.load32_s", 0x34, []>;
 defm LOAD32_U_I64 : WebAssemblyLoad<I64, "i64.load32_u", 0x35, []>;
 
-// Select extending loads with no constant offset.
-defm : LoadPatNoOffset<i32, sextloadi8, "LOAD8_S_I32">;
-defm : LoadPatNoOffset<i32, zextloadi8, "LOAD8_U_I32">;
-defm : LoadPatNoOffset<i32, sextloadi16, "LOAD16_S_I32">;
-defm : LoadPatNoOffset<i32, zextloadi16, "LOAD16_U_I32">;
-defm : LoadPatNoOffset<i64, sextloadi8, "LOAD8_S_I64">;
-defm : LoadPatNoOffset<i64, zextloadi8, "LOAD8_U_I64">;
-defm : LoadPatNoOffset<i64, sextloadi16, "LOAD16_S_I64">;
-defm : LoadPatNoOffset<i64, zextloadi16, "LOAD16_U_I64">;
-defm : LoadPatNoOffset<i64, sextloadi32, "LOAD32_S_I64">;
-defm : LoadPatNoOffset<i64, zextloadi32, "LOAD32_U_I64">;
-
-// Select extending loads with a constant offset.
-defm : LoadPatImmOff<i32, sextloadi8, regPlusImm, "LOAD8_S_I32">;
-defm : LoadPatImmOff<i32, zextloadi8, regPlusImm, "LOAD8_U_I32">;
-defm : LoadPatImmOff<i32, sextloadi16, regPlusImm, "LOAD16_S_I32">;
-defm : LoadPatImmOff<i32, zextloadi16, regPlusImm, "LOAD16_U_I32">;
-defm : LoadPatImmOff<i64, sextloadi8, regPlusImm, "LOAD8_S_I64">;
-defm : LoadPatImmOff<i64, zextloadi8, regPlusImm, "LOAD8_U_I64">;
-defm : LoadPatImmOff<i64, sextloadi16, regPlusImm, "LOAD16_S_I64">;
-defm : LoadPatImmOff<i64, zextloadi16, regPlusImm, "LOAD16_U_I64">;
-defm : LoadPatImmOff<i64, sextloadi32, regPlusImm, "LOAD32_S_I64">;
-defm : LoadPatImmOff<i64, zextloadi32, regPlusImm, "LOAD32_U_I64">;
-
-defm : LoadPatImmOff<i32, sextloadi8, or_is_add, "LOAD8_S_I32">;
-defm : LoadPatImmOff<i32, zextloadi8, or_is_add, "LOAD8_U_I32">;
-defm : LoadPatImmOff<i32, sextloadi16, or_is_add, "LOAD16_S_I32">;
-defm : LoadPatImmOff<i32, zextloadi16, or_is_add, "LOAD16_U_I32">;
-defm : LoadPatImmOff<i64, sextloadi8, or_is_add, "LOAD8_S_I64">;
-defm : LoadPatImmOff<i64, zextloadi8, or_is_add, "LOAD8_U_I64">;
-defm : LoadPatImmOff<i64, sextloadi16, or_is_add, "LOAD16_S_I64">;
-defm : LoadPatImmOff<i64, zextloadi16, or_is_add, "LOAD16_U_I64">;
-defm : LoadPatImmOff<i64, sextloadi32, or_is_add, "LOAD32_S_I64">;
-defm : LoadPatImmOff<i64, zextloadi32, or_is_add, "LOAD32_U_I64">;
-
-// Select extending loads with just a constant offset.
-defm : LoadPatOffsetOnly<i32, sextloadi8, "LOAD8_S_I32">;
-defm : LoadPatOffsetOnly<i32, zextloadi8, "LOAD8_U_I32">;
-defm : LoadPatOffsetOnly<i32, sextloadi16, "LOAD16_S_I32">;
-defm : LoadPatOffsetOnly<i32, zextloadi16, "LOAD16_U_I32">;
-
-defm : LoadPatOffsetOnly<i64, sextloadi8, "LOAD8_S_I64">;
-defm : LoadPatOffsetOnly<i64, zextloadi8, "LOAD8_U_I64">;
-defm : LoadPatOffsetOnly<i64, sextloadi16, "LOAD16_S_I64">;
-defm : LoadPatOffsetOnly<i64, zextloadi16, "LOAD16_U_I64">;
-defm : LoadPatOffsetOnly<i64, sextloadi32, "LOAD32_S_I64">;
-defm : LoadPatOffsetOnly<i64, zextloadi32, "LOAD32_U_I64">;
-
-defm : LoadPatGlobalAddrOffOnly<i32, sextloadi8, "LOAD8_S_I32">;
-defm : LoadPatGlobalAddrOffOnly<i32, zextloadi8, "LOAD8_U_I32">;
-defm : LoadPatGlobalAddrOffOnly<i32, sextloadi16, "LOAD16_S_I32">;
-defm : LoadPatGlobalAddrOffOnly<i32, zextloadi16, "LOAD16_U_I32">;
-defm : LoadPatGlobalAddrOffOnly<i64, sextloadi8, "LOAD8_S_I64">;
-defm : LoadPatGlobalAddrOffOnly<i64, zextloadi8, "LOAD8_U_I64">;
-defm : LoadPatGlobalAddrOffOnly<i64, sextloadi16, "LOAD16_S_I64">;
-defm : LoadPatGlobalAddrOffOnly<i64, zextloadi16, "LOAD16_U_I64">;
-defm : LoadPatGlobalAddrOffOnly<i64, sextloadi32, "LOAD32_S_I64">;
-defm : LoadPatGlobalAddrOffOnly<i64, zextloadi32, "LOAD32_U_I64">;
-
-// Resolve "don't care" extending loads to zero-extending loads. This is
-// somewhat arbitrary, but zero-extending is conceptually simpler.
-
-// Select "don't care" extending loads with no constant offset.
-defm : LoadPatNoOffset<i32, extloadi8, "LOAD8_U_I32">;
-defm : LoadPatNoOffset<i32, extloadi16, "LOAD16_U_I32">;
-defm : LoadPatNoOffset<i64, extloadi8, "LOAD8_U_I64">;
-defm : LoadPatNoOffset<i64, extloadi16, "LOAD16_U_I64">;
-defm : LoadPatNoOffset<i64, extloadi32, "LOAD32_U_I64">;
-
-// Select "don't care" extending loads with a constant offset.
-defm : LoadPatImmOff<i32, extloadi8, regPlusImm, "LOAD8_U_I32">;
-defm : LoadPatImmOff<i32, extloadi16, regPlusImm, "LOAD16_U_I32">;
-defm : LoadPatImmOff<i64, extloadi8, regPlusImm, "LOAD8_U_I64">;
-defm : LoadPatImmOff<i64, extloadi16, regPlusImm, "LOAD16_U_I64">;
-defm : LoadPatImmOff<i64, extloadi32, regPlusImm, "LOAD32_U_I64">;
-defm : LoadPatImmOff<i32, extloadi8, or_is_add, "LOAD8_U_I32">;
-defm : LoadPatImmOff<i32, extloadi16, or_is_add, "LOAD16_U_I32">;
-defm : LoadPatImmOff<i64, extloadi8, or_is_add, "LOAD8_U_I64">;
-defm : LoadPatImmOff<i64, extloadi16, or_is_add, "LOAD16_U_I64">;
-defm : LoadPatImmOff<i64, extloadi32, or_is_add, "LOAD32_U_I64">;
-
-// Select "don't care" extending loads with just a constant offset.
-defm : LoadPatOffsetOnly<i32, extloadi8, "LOAD8_U_I32">;
-defm : LoadPatOffsetOnly<i32, extloadi16, "LOAD16_U_I32">;
-defm : LoadPatOffsetOnly<i64, extloadi8, "LOAD8_U_I64">;
-defm : LoadPatOffsetOnly<i64, extloadi16, "LOAD16_U_I64">;
-defm : LoadPatOffsetOnly<i64, extloadi32, "LOAD32_U_I64">;
-defm : LoadPatGlobalAddrOffOnly<i32, extloadi8, "LOAD8_U_I32">;
-defm : LoadPatGlobalAddrOffOnly<i32, extloadi16, "LOAD16_U_I32">;
-defm : LoadPatGlobalAddrOffOnly<i64, extloadi8, "LOAD8_U_I64">;
-defm : LoadPatGlobalAddrOffOnly<i64, extloadi16, "LOAD16_U_I64">;
-defm : LoadPatGlobalAddrOffOnly<i64, extloadi32, "LOAD32_U_I64">;
+// Pattern matching
+
+// Patterns that match the static (offset) and dynamic (stack operand)
+// operands for loads, based on a combination of target global
+// addresses and constants.
+// For example,
+// (add tga x)   -> load tga(x)
+// tga           -> load tga(0)
+// (add const x) -> load const(x)
+// const         -> load const(0)
+// x             -> load 0(x)
+def LoadOps32 : ComplexPattern<i32, 2, "SelectLoadOperands32">;
+def LoadOps64 : ComplexPattern<i64, 2, "SelectLoadOperands64">;
+
+multiclass LoadPat<ValueType ty, SDPatternOperator kind, string Name> {
+      def : Pat<(ty (kind (LoadOps32 I32:$offset, I32:$dynamic))),
+           (!cast<NI>(Name # "_A32") 0,
+                                     I32:$offset,
+                                     I32:$dynamic)>,
+                                     Requires<[HasAddr32]>;
+
+      def : Pat<(ty (kind (LoadOps64 I64:$offset, I64:$dynamic))),
+           (!cast<NI>(Name # "_A64") 0,
+                                     I64:$offset,
+                                     I64:$dynamic)>,
+                                     Requires<[HasAddr64]>;
+
+}
+
+defm : LoadPat<i32, load, "LOAD_I32">;
+defm : LoadPat<i64, load, "LOAD_I64">;
+defm : LoadPat<f32, load, "LOAD_F32">;
+defm : LoadPat<f64, load, "LOAD_F64">;
+
+defm : LoadPat<i32, sextloadi8, "LOAD8_S_I32">;
+defm : LoadPat<i32, sextloadi16, "LOAD16_S_I32">;
+defm : LoadPat<i64, sextloadi8, "LOAD8_S_I64">;
+defm : LoadPat<i64, sextloadi16, "LOAD16_S_I64">;
+defm : LoadPat<i64, sextloadi32, "LOAD32_S_I64">;
+
+defm : LoadPat<i32, zextloadi8, "LOAD8_U_I32">;
+defm : LoadPat<i32, zextloadi16, "LOAD16_U_I32">;
+defm : LoadPat<i64, zextloadi8, "LOAD8_U_I64">;
+defm : LoadPat<i64, zextloadi16, "LOAD16_U_I64">;
+defm : LoadPat<i64, zextloadi32, "LOAD32_U_I64">;
+
+defm : LoadPat<i32, extloadi8, "LOAD8_U_I32">;
+defm : LoadPat<i32, extloadi16, "LOAD16_U_I32">;
+defm : LoadPat<i64, extloadi8, "LOAD8_U_I64">;
+defm : LoadPat<i64, extloadi16, "LOAD16_U_I64">;
+defm : LoadPat<i64, extloadi32, "LOAD32_U_I64">;
 
 // Defines atomic and non-atomic stores, regular and truncating
 multiclass WebAssemblyStore<WebAssemblyRegClass rc, string Name, int Opcode,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -167,11 +167,7 @@
 
 // Def load patterns from WebAssemblyInstrMemory.td for vector types
 foreach vec = AllVecs in {
-defm : LoadPatNoOffset<vec.vt, load, "LOAD_V128">;
-defm : LoadPatImmOff<vec.vt, load, regPlusImm, "LOAD_V128">;
-defm : LoadPatImmOff<vec.vt, load, or_is_add, "LOAD_V128">;
-defm : LoadPatOffsetOnly<vec.vt, load, "LOAD_V128">;
-defm : LoadPatGlobalAddrOffOnly<vec.vt, load, "LOAD_V128">;
+defm : LoadPat<vec.vt, load, "LOAD_V128">;
 }
 
 // v128.loadX_splat
@@ -206,11 +202,7 @@
 
 foreach vec = AllVecs in {
 defvar inst = "LOAD"#vec.lane_bits#"_SPLAT";
-defm : LoadPatNoOffset<vec.vt, load_splat, inst>;
-defm : LoadPatImmOff<vec.vt, load_splat, regPlusImm, inst>;
-defm : LoadPatImmOff<vec.vt, load_splat, or_is_add, inst>;
-defm : LoadPatOffsetOnly<vec.vt, load_splat, inst>;
-defm : LoadPatGlobalAddrOffOnly<vec.vt, load_splat, inst>;
+defm : LoadPat<vec.vt, load_splat, inst>;
 }
 
 // Load and extend
@@ -255,11 +247,7 @@
                 ["extloadvi", "_U"]] in {
 defvar loadpat = !cast<PatFrag>(exts[0]#vec.split.lane_bits);
 defvar inst = "LOAD_EXTEND"#exts[1]#"_"#vec;
-defm : LoadPatNoOffset<vec.vt, loadpat, inst>;
-defm : LoadPatImmOff<vec.vt, loadpat, regPlusImm, inst>;
-defm : LoadPatImmOff<vec.vt, loadpat, or_is_add, inst>;
-defm : LoadPatOffsetOnly<vec.vt, loadpat, inst>;
-defm : LoadPatGlobalAddrOffOnly<vec.vt, loadpat, inst>;
+defm : LoadPat<vec.vt, loadpat, inst>;
 }
 
 // Load lane into zero vector
@@ -289,11 +277,7 @@
 foreach vec = [I32x4, I64x2] in {
   defvar inst = "LOAD_ZERO_"#vec;
   defvar pat = PatFrag<(ops node:$addr), (scalar_to_vector (vec.lane_vt (load $addr)))>;
-  defm : LoadPatNoOffset<vec.vt, pat, inst>;
-  defm : LoadPatImmOff<vec.vt, pat, regPlusImm, inst>;
-  defm : LoadPatImmOff<vec.vt, pat, or_is_add, inst>;
-  defm : LoadPatOffsetOnly<vec.vt, pat, inst>;
-  defm : LoadPatGlobalAddrOffOnly<vec.vt, pat, inst>;
+  defm : LoadPat<vec.vt, pat, inst>;
 }
 
 // TODO: f32x4 and f64x2 as well
@@ -301,11 +285,7 @@
   defvar inst = "LOAD_ZERO_"#vec;
   defvar pat = PatFrag<(ops node:$ptr),
     (vector_insert (vec.splat (vec.lane_vt 0)), (vec.lane_vt (load $ptr)), 0)>;
-  defm : LoadPatNoOffset<vec.vt, pat, inst>;
-  defm : LoadPatImmOff<vec.vt, pat, regPlusImm, inst>;
-  defm : LoadPatImmOff<vec.vt, pat, or_is_add, inst>;
-  defm : LoadPatOffsetOnly<vec.vt, pat, inst>;
-  defm : LoadPatGlobalAddrOffOnly<vec.vt, pat, inst>;
+  defm : LoadPat<vec.vt, pat, inst>;
 }
 
 // Load lane
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
--- a/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -75,14 +75,19 @@
   if (AddrOperandNum == FIOperandNum) {
     unsigned OffsetOperandNum = WebAssembly::getNamedOperandIdx(
         MI.getOpcode(), WebAssembly::OpName::off);
-    assert(FrameOffset >= 0 && MI.getOperand(OffsetOperandNum).getImm() >= 0);
-    int64_t Offset = MI.getOperand(OffsetOperandNum).getImm() + FrameOffset;
-
-    if (static_cast<uint64_t>(Offset) <= std::numeric_limits<uint32_t>::max()) {
-      MI.getOperand(OffsetOperandNum).setImm(Offset);
-      MI.getOperand(FIOperandNum)
-          .ChangeToRegister(FrameRegister, /*isDef=*/false);
-      return false;
+    auto &OffsetOp = MI.getOperand(OffsetOperandNum);
+    // Don't fold offset in if offset is a global address to be resolved later
+    if (OffsetOp.isImm()) {
+      assert(FrameOffset >= 0 && OffsetOp.getImm() >= 0);
+      int64_t Offset = OffsetOp.getImm() + FrameOffset;
+
+      if (static_cast<uint64_t>(Offset) <=
+          std::numeric_limits<uint32_t>::max()) {
+        OffsetOp.setImm(Offset);
+        MI.getOperand(FIOperandNum)
+            .ChangeToRegister(FrameRegister, /*isDef=*/false);
+        return false;
+      }
     }
   }
 
diff --git a/llvm/test/CodeGen/WebAssembly/offset.ll b/llvm/test/CodeGen/WebAssembly/offset.ll
--- a/llvm/test/CodeGen/WebAssembly/offset.ll
+++ b/llvm/test/CodeGen/WebAssembly/offset.ll
@@ -666,3 +666,29 @@
 define {i64,i32,i16,i8} @aggregate_return_without_merge() {
   ret {i64,i32,i16,i8} zeroinitializer
 }
+
+;===----------------------------------------------------------------------------
+; Global address loads
+;===----------------------------------------------------------------------------
+
+@global_i32 = external global i32
+@global_i8 = external global i8
+
+; CHECK-LABEL: load_i32_global_address_with_folded_offset:
+; CHECK: i32.const $push0=, 2
+; CHECK: i32.shl $push1=, $0, $pop0
+; CHECK: i32.load $push2=, global_i32($pop1)
+define i32 @load_i32_global_address_with_folded_offset(i32 %n) {
+  %s = getelementptr inbounds i32, i32* @global_i32, i32 %n
+  %t = load i32, i32* %s
+  ret i32 %t
+}
+
+; CHECK-LABEL: load_i8_i32s_global_address_with_folded_offset:
+; CHECK: i32.load8_s $push0=, global_i8($0)
+define i32 @load_i8_i32s_global_address_with_folded_offset(i32 %n) {
+  %s = getelementptr inbounds i8, i8* @global_i8, i32 %n
+  %t = load i8, i8* %s
+  %u = sext i8 %t to i32
+  ret i32 %u
+}
diff --git a/llvm/test/CodeGen/WebAssembly/userstack.ll b/llvm/test/CodeGen/WebAssembly/userstack.ll
--- a/llvm/test/CodeGen/WebAssembly/userstack.ll
+++ b/llvm/test/CodeGen/WebAssembly/userstack.ll
@@ -334,7 +334,7 @@
 ; only possible when that operand is an immediate. In this example it is a
 ; global address, so we should not fold it.
 ; CHECK-LABEL: frame_offset_with_global_address
-; CHECK: i[[PTR]].const ${{.*}}=, str
+; CHECK: i32.load8_u ${{.*}}=, str
 @str = local_unnamed_addr global [3 x i8] c"abc", align 16
 define i8 @frame_offset_with_global_address() {
   %1 = alloca i8, align 4