diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19375,26 +19375,106 @@
   return true;
 }
 
-/// Walk up chain skipping non-aliasing memory nodes,
-/// looking for aliasing nodes and adding them to the Aliases vector.
+static bool hasGluedInputChain(SDNode *N) {
+  auto LastOpNo = N->getNumOperands() - 1;
+  bool hasChain = N->getOperand(0).getValueType() == MVT::Other;
+  bool hasInputGlue = N->getOperand(LastOpNo).getValueType() == MVT::Glue;
+  return hasChain && hasInputGlue &&
+         (N->getOperand(0).getNode() == N->getOperand(LastOpNo).getNode());
+}
+
+static bool hasGluedOutput(SDNode *N) {
+  if (N->getNumValues() <= 1)
+    return false;
+  auto LastValNo = N->getNumValues() - 1;
+  bool hasChainOut = N->getValueType(LastValNo - 1) == MVT::Other;
+  bool hasGlueOut = N->getValueType(LastValNo) == MVT::Glue;
+  return hasChainOut && hasGlueOut;
+}
+
 void DAGCombiner::GatherAllAliases(LSBaseSDNode *N, SDValue OriginalChain,
                                    SmallVectorImpl<SDValue> &Aliases) {
   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
 
   // Get alias information for node.
-  bool IsLoad = isa<LoadSDNode>(N) && !N->isVolatile();
+  const bool IsLoad = isa<LoadSDNode>(N) && !N->isVolatile();
 
   // Starting off.
   Chains.push_back(OriginalChain);
   unsigned Depth = 0;
 
+  // Attempt to improve chain by a single step
+  std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
+    switch (C.getOpcode()) {
+    case ISD::EntryToken:
+      // No need to mark EntryToken.
+      C = SDValue();
+      return true;
+    case ISD::LOAD:
+    case ISD::STORE: {
+      // Get alias information for C.
+      auto LSChain = cast<LSBaseSDNode>(C.getNode());
+      bool IsOpLoad = isa<LoadSDNode>(C.getNode()) && !LSChain->isVolatile();
+      if ((IsLoad && IsOpLoad) || !isAlias(N, LSChain)) {
+        // Look further up the chain.
+        C = C.getOperand(0);
+        return true;
+      }
+      // Alias, so stop here.
+      return false;
+    }
+
+    case ISD::CopyFromReg: {
+      // If this is glued, recursively check chain to see if we can skip the
+      // full glued chain..
+      if (hasGluedInputChain(C.getNode())) {
+        C = C.getOperand(0);
+        return ImproveChain(C);
+      }
+      // Unglued node can always be skipped.
+      C = C.getOperand(0);
+      return true;
+    }
+
+    case ISD::CopyToReg: {
+      if (hasGluedInputChain(C.getNode())) {
+        C = C.getOperand(0);
+        return ImproveChain(C);
+      }
+      // If this had a glued output, return C so recursive calls of this
+      // succeed.
+      return hasGluedOutput(C.getNode());
+    }
+
+    case ISD::INLINEASM: {
+      auto *ExtraInfo =
+          cast<ConstantSDNode>(C->getOperand(InlineAsm::Op_ExtraInfo));
+      unsigned EIInt = ExtraInfo->getZExtValue();
+      bool IsSafe = !(EIInt & InlineAsm::Extra_MayStore);
+      if (!IsLoad)
+        IsSafe = IsSafe && (EIInt & (InlineAsm::Extra_MayLoad));
+      if (IsSafe && hasGluedInputChain(C.getNode())) {
+        C = C.getOperand(0);
+        return ImproveChain(C);
+      }
+      return false;
+    }
+    default:
+      return false;
+    }
+  };
+
   // Look at each chain and determine if it is an alias.  If so, add it to the
   // aliases list.  If not, then continue up the chain looking for the next
   // candidate.
   while (!Chains.empty()) {
     SDValue Chain = Chains.pop_back_val();
 
+    // Don't bother if we've seen Chain before.
+    if (!Visited.insert(Chain.getNode()).second)
+      continue;
+
     // For TokenFactor nodes, look at each operand and only continue up the
     // chain until we reach the depth limit.
     //
@@ -19407,58 +19487,30 @@
       return;
     }
 
-    // Don't bother if we've been before.
-    if (!Visited.insert(Chain.getNode()).second)
-      continue;
-
-    switch (Chain.getOpcode()) {
-    case ISD::EntryToken:
-      // Entry token is ideal chain operand, but handled in FindBetterChain.
-      break;
-
-    case ISD::LOAD:
-    case ISD::STORE: {
-      // Get alias information for Chain.
-      bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
-          !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
-
-      // If chain is alias then stop here.
-      if (!(IsLoad && IsOpLoad) &&
-          isAlias(N, cast<LSBaseSDNode>(Chain.getNode()))) {
-        Aliases.push_back(Chain);
-      } else {
-        // Look further up the chain.
-        Chains.push_back(Chain.getOperand(0));
-        ++Depth;
-      }
-      break;
-    }
-
-    case ISD::TokenFactor:
+    if (Chain.getOpcode() == ISD::TokenFactor) {
       // We have to check each of the operands of the token factor for "small"
       // token factors, so we queue them up.  Adding the operands to the queue
       // (stack) in reverse order maintains the original order and increases the
       // likelihood that getNode will find a matching token factor (CSE.)
       if (Chain.getNumOperands() > 16) {
         Aliases.push_back(Chain);
-        break;
+        continue;
       }
       for (unsigned n = Chain.getNumOperands(); n;)
         Chains.push_back(Chain.getOperand(--n));
       ++Depth;
-      break;
-
-    case ISD::CopyFromReg:
-      // Forward past CopyFromReg.
-      Chains.push_back(Chain.getOperand(0));
+      continue;
+    }
+    // Everything else
+    if (ImproveChain(Chain)) {
+      // Updated Chain Found, Consider new chain if one exists.
+      if (Chain.getNode())
+        Chains.push_back(Chain);
       ++Depth;
-      break;
-
-    default:
-      // For all other instructions we will just have to take what we can get.
-      Aliases.push_back(Chain);
-      break;
+      continue;
     }
+    // No Improved Chain Possible, treat as Alias.
+    Aliases.push_back(Chain);
   }
 }
 
diff --git a/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll b/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll
--- a/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll
+++ b/llvm/test/CodeGen/Mips/cconv/arguments-varargs.ll
@@ -122,12 +122,12 @@
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "teqi $$zero, 1", ""()
+  call void asm sideeffect "teqi $$zero, 1", "~{memory}"()
   %arg1 = va_arg i8** %ap, i16
   %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
   store volatile i16 %arg1, i16* %e1, align 2
 
-  call void asm sideeffect "teqi $$zero, 2", ""()
+  call void asm sideeffect "teqi $$zero, 2", "~{memory}"()
   %arg2 = va_arg i8** %ap, i16
   %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
   store volatile i16 %arg2, i16* %e2, align 2
@@ -237,12 +237,12 @@
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "teqi $$zero, 1", ""()
+  call void asm sideeffect "teqi $$zero, 1", "~{memory}"()
   %arg1 = va_arg i8** %ap, i32
   %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
   store volatile i32 %arg1, i32* %e1, align 4
 
-  call void asm sideeffect "teqi $$zero, 2", ""()
+  call void asm sideeffect "teqi $$zero, 2", "~{memory}"()
   %arg2 = va_arg i8** %ap, i32
   %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
   store volatile i32 %arg2, i32* %e2, align 4
@@ -359,12 +359,12 @@
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "teqi $$zero, 1", ""()
+  call void asm sideeffect "teqi $$zero, 1", "~{memory}"()
   %arg1 = va_arg i8** %ap, i64
   %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
   store volatile i64 %arg1, i64* %e1, align 8
 
-  call void asm sideeffect "teqi $$zero, 2", ""()
+  call void asm sideeffect "teqi $$zero, 2", "~{memory}"()
   %arg2 = va_arg i8** %ap, i64
   %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
   store volatile i64 %arg2, i64* %e2, align 8
@@ -474,12 +474,12 @@
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "teqi $$zero, 1", ""()
+  call void asm sideeffect "teqi $$zero, 1", "~{memory}"()
   %arg1 = va_arg i8** %ap, i16
   %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
   store volatile i16 %arg1, i16* %e1, align 2
 
-  call void asm sideeffect "teqi $$zero, 2", ""()
+  call void asm sideeffect "teqi $$zero, 2", "~{memory}"()
   %arg2 = va_arg i8** %ap, i16
   %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
   store volatile i16 %arg2, i16* %e2, align 2
@@ -589,12 +589,12 @@
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "teqi $$zero, 1", ""()
+  call void asm sideeffect "teqi $$zero, 1", "~{memory}"()
   %arg1 = va_arg i8** %ap, i32
   %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
   store volatile i32 %arg1, i32* %e1, align 4
 
-  call void asm sideeffect "teqi $$zero, 2", ""()
+  call void asm sideeffect "teqi $$zero, 2", "~{memory}"()
   %arg2 = va_arg i8** %ap, i32
   %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
   store volatile i32 %arg2, i32* %e2, align 4
@@ -711,12 +711,12 @@
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "teqi $$zero, 1", ""()
+  call void asm sideeffect "teqi $$zero, 1", "~{memory}"()
   %arg1 = va_arg i8** %ap, i64
   %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
   store volatile i64 %arg1, i64* %e1, align 8
 
-  call void asm sideeffect "teqi $$zero, 2", ""()
+  call void asm sideeffect "teqi $$zero, 2", "~{memory}"()
   %arg2 = va_arg i8** %ap, i64
   %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
   store volatile i64 %arg2, i64* %e2, align 8
@@ -825,12 +825,12 @@
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "teqi $$zero, 1", ""()
+  call void asm sideeffect "teqi $$zero, 1", "~{memory}"()
   %arg1 = va_arg i8** %ap, i16
   %e1 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 1
   store volatile i16 %arg1, i16* %e1, align 2
 
-  call void asm sideeffect "teqi $$zero, 2", ""()
+  call void asm sideeffect "teqi $$zero, 2", "~{memory}"()
   %arg2 = va_arg i8** %ap, i16
   %e2 = getelementptr [3 x i16], [3 x i16]* @hwords, i32 0, i32 2
   store volatile i16 %arg2, i16* %e2, align 2
@@ -939,12 +939,12 @@
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "teqi $$zero, 1", ""()
+  call void asm sideeffect "teqi $$zero, 1", "~{memory}"()
   %arg1 = va_arg i8** %ap, i32
   %e1 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 1
   store volatile i32 %arg1, i32* %e1, align 4
 
-  call void asm sideeffect "teqi $$zero, 2", ""()
+  call void asm sideeffect "teqi $$zero, 2", "~{memory}"()
   %arg2 = va_arg i8** %ap, i32
   %e2 = getelementptr [3 x i32], [3 x i32]* @words, i32 0, i32 2
   store volatile i32 %arg2, i32* %e2, align 4
@@ -1060,12 +1060,12 @@
   %ap2 = bitcast i8** %ap to i8*
   call void @llvm.va_start(i8* %ap2)
 
-  call void asm sideeffect "teqi $$zero, 1", ""()
+  call void asm sideeffect "teqi $$zero, 1", "~{memory}"()
   %arg1 = va_arg i8** %ap, i64
   %e1 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 1
   store volatile i64 %arg1, i64* %e1, align 8
 
-  call void asm sideeffect "teqi $$zero, 2", ""()
+  call void asm sideeffect "teqi $$zero, 2", "~{memory}"()
   %arg2 = va_arg i8** %ap, i64
   %e2 = getelementptr [3 x i64], [3 x i64]* @dwords, i32 0, i32 2
   store volatile i64 %arg2, i64* %e2, align 8
diff --git a/llvm/test/CodeGen/Thumb/frame-access.ll b/llvm/test/CodeGen/Thumb/frame-access.ll
--- a/llvm/test/CodeGen/Thumb/frame-access.ll
+++ b/llvm/test/CodeGen/Thumb/frame-access.ll
@@ -280,9 +280,9 @@
 ; CHECK-NEXT:  add r2, sp, #12
 ; CHECK-NEXT:  bl  h
 ; Load `x`, `y`, and `z` via SP
-; CHECK:       ldr r1, [sp, #20]
-; CHECK-NEXT:  ldr r2, [sp, #16]
-; CHECK-NEXT:  ldr r3, [sp, #12]
+; CHECK-DAG:  ldr r1, [sp, #20]
+; CHECK-DAG:  ldr r2, [sp, #16]
+; CHECK-DAG:  ldr r3, [sp, #12]
 ; CHECK:       bl  g
 
 ; Re-aligned stack, access via SP.
@@ -324,9 +324,9 @@
 ; CHECK-NEXT:  add r2, sp, #20
 ; CHECK-NEXT:  bl  h
 ; Load `x`, `y`, and `z` via SP for passing to `g`
-; CHECK:       ldr r1, [sp, #28]
-; CHECK-NEXT:  ldr r2, [sp, #24]
-; CHECK-NEXT:  ldr r3, [sp, #20]
+; CHECK-DAG:       ldr r1, [sp, #28]
+; CHECK-DAG:  ldr r2, [sp, #24]
+; CHECK-DAG:  ldr r3, [sp, #20]
 ; CHECK:       bl  g
 
 ; VLAs, access via BP.
diff --git a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll
--- a/llvm/test/CodeGen/X86/inline-asm-fpstack.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-fpstack.ll
@@ -209,17 +209,12 @@
 ; CHECK:       ## %bb.0: ## %entry
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    fldt (%eax)
-; CHECK-NEXT:    flds LCPI10_0
-; CHECK-NEXT:    fmul %st, %st(1)
-; CHECK-NEXT:    flds LCPI10_1
-; CHECK-NEXT:    fmul %st, %st(2)
-; CHECK-NEXT:    fxch %st(2)
+; CHECK-NEXT:    fmuls LCPI10_0
+; CHECK-NEXT:    fmuls LCPI10_1
+; CHECK-NEXT:    fld %st(0)
 ; CHECK-NEXT:    ## InlineAsm Start
 ; CHECK-NEXT:    fistpl %st
 ; CHECK-NEXT:    ## InlineAsm End
-; CHECK-NEXT:    fldt (%eax)
-; CHECK-NEXT:    fmulp %st, %st(1)
-; CHECK-NEXT:    fmulp %st, %st(1)
 ; CHECK-NEXT:    ## InlineAsm Start
 ; CHECK-NEXT:    fistpl %st
 ; CHECK-NEXT:    ## InlineAsm End
diff --git a/llvm/test/CodeGen/X86/pr9517.ll b/llvm/test/CodeGen/X86/pr9517.ll
--- a/llvm/test/CodeGen/X86/pr9517.ll
+++ b/llvm/test/CodeGen/X86/pr9517.ll
@@ -11,7 +11,6 @@
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    movzwl {{.*}}(%rip), %eax
 ; CHECK-NEXT:    incl %eax
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
@@ -22,7 +21,7 @@
   ret i16 %v
 }
 
-; The asm call prevents the merging the loads here. 
+; The asm call prevents the merging the loads here.
 define i16 @unify_through_trival_asm_w_memory_clobber() {
 ; CHECK-LABEL: unify_through_trival_asm_w_memory_clobber:
 ; CHECK:       # %bb.0:
@@ -47,61 +46,37 @@
 ; CHECK-NEXT:    movzwl {{.*}}(%rip), %edx
 ; CHECK-NEXT:    addl $16, %edx
 ; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    # kill: def $dx killed $dx killed $edx
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    outb %al, %dx
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    movzwl {{.*}}(%rip), %edx
-; CHECK-NEXT:    addl $16, %edx
 ; CHECK-NEXT:    movb $1, %al
-; CHECK-NEXT:    # kill: def $dx killed $dx killed $edx
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    outb %al, %dx
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    movzwl {{.*}}(%rip), %edx
-; CHECK-NEXT:    addl $16, %edx
 ; CHECK-NEXT:    movb $2, %al
-; CHECK-NEXT:    # kill: def $dx killed $dx killed $edx
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    outb %al, %dx
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    movzwl {{.*}}(%rip), %edx
-; CHECK-NEXT:    addl $16, %edx
 ; CHECK-NEXT:    movb $3, %al
-; CHECK-NEXT:    # kill: def $dx killed $dx killed $edx
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    outb %al, %dx
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    movzwl {{.*}}(%rip), %edx
-; CHECK-NEXT:    addl $16, %edx
 ; CHECK-NEXT:    movb $4, %al
-; CHECK-NEXT:    # kill: def $dx killed $dx killed $edx
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    outb %al, %dx
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    movzwl {{.*}}(%rip), %edx
-; CHECK-NEXT:    addl $16, %edx
 ; CHECK-NEXT:    movb $5, %al
-; CHECK-NEXT:    # kill: def $dx killed $dx killed $edx
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    outb %al, %dx
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    movzwl {{.*}}(%rip), %edx
-; CHECK-NEXT:    addl $16, %edx
 ; CHECK-NEXT:    movb $6, %al
-; CHECK-NEXT:    # kill: def $dx killed $dx killed $edx
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    outb %al, %dx
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    movzwl {{.*}}(%rip), %edx
-; CHECK-NEXT:    addl $16, %edx
 ; CHECK-NEXT:    movb $7, %al
-; CHECK-NEXT:    # kill: def $dx killed $dx killed $edx
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    outb %al, %dx
 ; CHECK-NEXT:    #NO_APP
-; CHECK-NEXT:    movzwl {{.*}}(%rip), %edx
-; CHECK-NEXT:    addl $16, %edx
 ; CHECK-NEXT:    movb $8, %al
 ; CHECK-NEXT:    # kill: def $dx killed $dx killed $edx
 ; CHECK-NEXT:    #APP