Index: include/llvm/IR/DebugInfoMetadata.h
===================================================================
--- include/llvm/IR/DebugInfoMetadata.h
+++ include/llvm/IR/DebugInfoMetadata.h
@@ -2232,6 +2232,9 @@
   expr_op_iterator expr_op_end() const {
     return expr_op_iterator(elements_end());
   }
+  iterator_range<expr_op_iterator> expr_ops() const {
+    return {expr_op_begin(), expr_op_end()};
+  }
   /// @}
 
   bool isValid() const;
@@ -2240,11 +2243,19 @@
     return MD->getMetadataID() == DIExpressionKind;
   }
 
-  /// Is the first element a DW_OP_deref?.
+  /// Return whether the first element a DW_OP_deref.
   bool startsWithDeref() const {
     return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_deref;
   }
 
+  /// Return whether the last element a DW_OP_xderef.
+  bool endsWithXDeref() const {
+    unsigned Last = 0;
+    for (auto &Op : expr_ops())
+      Last = Op.getOp();
+    return Last == dwarf::DW_OP_xderef;
+  }
+
   /// Holds the characteristics of one fragment of a larger variable.
   struct FragmentInfo {
     uint64_t SizeInBits;
Index: lib/Bitcode/Reader/MetadataLoader.cpp
===================================================================
--- lib/Bitcode/Reader/MetadataLoader.cpp
+++ lib/Bitcode/Reader/MetadataLoader.cpp
@@ -1520,12 +1520,32 @@
       return error("Invalid record");
 
     IsDistinct = Record[0] & 1;
-    bool HasOpFragment = Record[0] & 2;
+    uint64_t Version = Record[0] >> 1;
     auto Elts = MutableArrayRef<uint64_t>(Record).slice(1);
-    if (!HasOpFragment)
-      if (unsigned N = Elts.size())
-        if (N >= 3 && Elts[N - 3] == dwarf::DW_OP_bit_piece)
-          Elts[N - 3] = dwarf::DW_OP_LLVM_fragment;
+    unsigned N = Elts.size();
+    // Perform various upgrades.
+    switch (Version) {
+    case 0:
+      if (N >= 3 && Elts[N - 3] == dwarf::DW_OP_bit_piece)
+        Elts[N - 3] = dwarf::DW_OP_LLVM_fragment;
+      LLVM_FALLTHROUGH;
+    case 1:
+      // Move DW_OP_deref to the end.
+      if (N && Elts[0] == dwarf::DW_OP_deref) {
+        unsigned I;
+        for (I = 0; I < N - 1; ++I) {
+          Elts[I] = Elts[I + 1];
+          if (N > 3 && I == N - 4 && Elts[I + 1] == dwarf::DW_OP_LLVM_fragment)
+            break;
+        }
+        Elts[I] = dwarf::DW_OP_deref;
+      }
+    case 2:
+      // Up-to-date!
+      break;
+    default:
+      return error("Invalid record");
+    }
 
     MetadataList.assignValue(
         GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))),
Index: lib/Bitcode/Writer/BitcodeWriter.cpp
===================================================================
--- lib/Bitcode/Writer/BitcodeWriter.cpp
+++ lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1756,9 +1756,8 @@
                                             SmallVectorImpl<uint64_t> &Record,
                                             unsigned Abbrev) {
   Record.reserve(N->getElements().size() + 1);
-
-  const uint64_t HasOpFragmentFlag = 1 << 1;
-  Record.push_back((uint64_t)N->isDistinct() | HasOpFragmentFlag);
+  const uint64_t Version = 2 << 1;
+  Record.push_back((uint64_t)N->isDistinct() | Version);
   Record.append(N->elements_begin(), N->elements_end());
 
   Stream.EmitRecord(bitc::METADATA_EXPRESSION, Record, Abbrev);
Index: lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
===================================================================
--- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -170,7 +170,11 @@
       DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
     }
 
+    SmallVector<uint64_t, 8> Ops;
     if (Global) {
+      // The location of a global is an address; the deref tells DwarfExpression
+      // to emit a memory location description.
+      Ops.push_back(dwarf::DW_OP_deref);
       const MCSymbol *Sym = Asm->getSymbol(Global);
       if (Global->isThreadLocal()) {
         if (Asm->TM.Options.EmulatedTLS) {
@@ -204,11 +208,13 @@
         DD->addArangeLabel(SymbolCU(this, Sym));
         addOpAddress(*Loc, Sym);
       }
-    }
-    if (Expr) {
-      DwarfExpr->addFragmentOffset(Expr);
-      DwarfExpr->addExpression(Expr);
-    }
+    } 
+
+    if (Expr)
+      Ops.append(Expr->elements_begin(), Expr->elements_end());
+    DIExpressionCursor Cursor(Ops);
+    DwarfExpr->addFragmentOffset(Expr);
+    DwarfExpr->addExpression(std::move(Cursor));
   }
   if (Loc)
     addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize());
@@ -547,18 +553,21 @@
   DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
   for (auto &Fragment : DV.getFrameIndexExprs()) {
     unsigned FrameReg = 0;
+    const DIExpression *Expr = Fragment.Expr;
     const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
     int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
-    DwarfExpr.addFragmentOffset(Fragment.Expr);
+    DwarfExpr.addFragmentOffset(Expr);
     SmallVector<uint64_t, 8> Ops;
     Ops.push_back(dwarf::DW_OP_plus);
     Ops.push_back(Offset);
-    Ops.push_back(dwarf::DW_OP_deref);
-    Ops.append(Fragment.Expr->elements_begin(), Fragment.Expr->elements_end());
-    DIExpressionCursor Expr(Ops);
+    // FIXME: This is a hack. We should come up with a less ambiguous encoding.
+    if (!Expr->endsWithXDeref())
+      Ops.push_back(dwarf::DW_OP_deref);
+    Ops.append(Expr->elements_begin(), Expr->elements_end());
+    DIExpressionCursor Cursor(Ops);
     DwarfExpr.addMachineRegExpression(
-        *Asm->MF->getSubtarget().getRegisterInfo(), Expr, FrameReg);
-    DwarfExpr.addExpression(std::move(Expr));
+        *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg);
+    DwarfExpr.addExpression(std::move(Cursor));
   }
   addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
 
Index: lib/CodeGen/AsmPrinter/DwarfDebug.cpp
===================================================================
--- lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1514,6 +1514,7 @@
     SmallVector<uint64_t, 8> Ops;
     // FIXME: Should this condition be Location.isIndirect() instead?
     if (Location.getOffset()) {
+    //if (Location.isIndirect()) {
       Ops.push_back(dwarf::DW_OP_plus);
       Ops.push_back(Location.getOffset());
       Ops.push_back(dwarf::DW_OP_deref);
Index: lib/CodeGen/AsmPrinter/DwarfExpression.h
===================================================================
--- lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -102,6 +102,9 @@
   unsigned SubRegisterSizeInBits = 0;
   unsigned SubRegisterOffsetInBits = 0;
 
+  /// The kind of location description being produced.
+  enum { Unknown = 0, Register, Memory, Implicit } LocationKind = Unknown;
+
   /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed
   /// to represent a subregister.
   void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) {
@@ -122,7 +125,8 @@
   /// current function.
   virtual bool isFrameRegister(const TargetRegisterInfo &TRI, unsigned MachineReg) = 0;
 
-  /// Emit a DW_OP_reg operation.
+  /// Emit a DW_OP_reg operation. Note that this is only legal inside a DWARF
+  /// register location description.
   void addReg(int DwarfReg, const char *Comment = nullptr);
   /// Emit a DW_OP_breg operation.
   void addBReg(int DwarfReg, int Offset);
Index: lib/CodeGen/AsmPrinter/DwarfExpression.cpp
===================================================================
--- lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -23,9 +23,12 @@
 using namespace llvm;
 
 void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
-  assert(DwarfReg >= 0 && "invalid negative dwarf register number");
-  if (DwarfReg < 32) {
-    emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
+ assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+ assert((LocationKind == Unknown || LocationKind == Register) &&
+        "location description already locked down");
+ LocationKind = Register;
+ if (DwarfReg < 32) {
+   emitOp(dwarf::DW_OP_reg0 + DwarfReg, Comment);
   } else {
     emitOp(dwarf::DW_OP_regx, Comment);
     emitUnsigned(DwarfReg);
@@ -34,6 +37,7 @@
 
 void DwarfExpression::addBReg(int DwarfReg, int Offset) {
   assert(DwarfReg >= 0 && "invalid negative dwarf register number");
+  assert(LocationKind != Register && "location description already locked down");
   if (DwarfReg < 32) {
     emitOp(dwarf::DW_OP_breg0 + DwarfReg);
   } else {
@@ -156,18 +160,23 @@
 }
 
 void DwarfExpression::addSignedConstant(int64_t Value) {
+  assert(LocationKind == Implicit || LocationKind == Unknown);
+  LocationKind = Implicit;
   emitOp(dwarf::DW_OP_consts);
   emitSigned(Value);
-  addStackValue();
 }
 
 void DwarfExpression::addUnsignedConstant(uint64_t Value) {
+  assert(LocationKind == Implicit || LocationKind == Unknown);
+  LocationKind = Implicit;
   emitOp(dwarf::DW_OP_constu);
   emitUnsigned(Value);
-  addStackValue();
 }
 
 void DwarfExpression::addUnsignedConstant(const APInt &Value) {
+  assert(LocationKind == Implicit || LocationKind == Unknown);
+  LocationKind = Implicit;
+
   unsigned Size = Value.getBitWidth();
   const uint64_t *Data = Value.getRawData();
 
@@ -178,7 +187,8 @@
     addUnsignedConstant(*Data++);
     if (Offset == 0 && Size <= 64)
       break;
-    addOpPiece(std::min(Size-Offset, 64u), Offset);
+    addStackValue();
+    addOpPiece(std::min(Size - Offset, 64u), Offset);
     Offset += 64;
   }
 }
@@ -216,6 +226,9 @@
     return true;
   }
 
+  // FIXME:
+  // Don't emit locations that cannot be expressed without DW_OP_stack_value.
+
   assert(DwarfRegs.size() == 1);
   auto Reg = DwarfRegs[0];
   bool FBReg = isFrameRegister(TRI, MachineReg); 
@@ -223,47 +236,50 @@
 
   // Pattern-match combinations for which more efficient representations exist.
   switch (Op->getOp()) {
-  default: {
-    if (FBReg)
-      addFBReg(0);
-    else
-      addReg(Reg.DwarfRegNo, 0);
-    break;
-  }
   case dwarf::DW_OP_plus:
   case dwarf::DW_OP_minus: {
-    // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset].
-    // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset].
-    auto N = ExprCursor.peekNext();
-    if (N && N->getOp() == dwarf::DW_OP_deref) {
-      int Offset = Op->getArg(0);
-      int SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset;
-      if (FBReg)
-        addFBReg(SignedOffset);
-      else
-        addBReg(Reg.DwarfRegNo, SignedOffset);
+    // [DW_OP_reg,Offset,DW_OP_plus] --> [DW_OP_breg, Offset].
+    // [DW_OP_reg,Offset,DW_OP_minus] --> [DW_OP_breg,-Offset].
+    int Offset = Op->getArg(0);
+    int SignedOffset = (Op->getOp() == dwarf::DW_OP_plus) ? Offset : -Offset;
+    if (FBReg)
+      addFBReg(SignedOffset);
+    else
+      addBReg(Reg.DwarfRegNo, SignedOffset);
 
-      ExprCursor.consume(2);
-      break;
-    }
-    addReg(Reg.DwarfRegNo, 0);
+    ExprCursor.take();
     break;
   }
-  case dwarf::DW_OP_deref:
-    // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
+  default: {
     if (FBReg)
       addFBReg(0);
     else
       addBReg(Reg.DwarfRegNo, 0);
-    ExprCursor.take();
     break;
   }
+  }
   DwarfRegs.clear();
   return true;
 }
 
+/// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?".
+static bool isMemoryLocation(DIExpressionCursor ExprCursor) {
+  while (ExprCursor) {
+    auto Op = ExprCursor.take();
+    switch (Op->getOp()) {
+    case dwarf::DW_OP_deref:
+    case dwarf::DW_OP_LLVM_fragment:
+      break;
+    default:
+      return false;
+    }
+  }
+  return true;
+}
+
 void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
                                     unsigned FragmentOffsetInBits) {
+  auto N = ExprCursor.peek();
   while (ExprCursor) {
     auto Op = ExprCursor.take();
 
@@ -291,40 +307,65 @@
       if (SubRegisterSizeInBits)
         SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits);
       
+      if (LocationKind != Register && LocationKind != Memory)
+        // Turn this into an implicit location description.
+        addStackValue();
+
+      // Emit the DW_OP_piece.
       addOpPiece(SizeInBits, SubRegisterOffsetInBits);
       setSubRegisterPiece(0, 0);
-      break;
+      // Reset the location description kind.
+      LocationKind = Unknown;
+      return;
     }
     case dwarf::DW_OP_plus:
+      assert(LocationKind != Register && LocationKind != Memory);
       emitOp(dwarf::DW_OP_plus_uconst);
       emitUnsigned(Op->getArg(0));
       break;
     case dwarf::DW_OP_minus:
-      // There is no OP_minus_uconst.
+      assert(LocationKind != Register && LocationKind != Memory);
+      // There is no DW_OP_minus_uconst.
       emitOp(dwarf::DW_OP_constu);
       emitUnsigned(Op->getArg(0));
       emitOp(dwarf::DW_OP_minus);
       break;
-    case dwarf::DW_OP_deref:
-      emitOp(dwarf::DW_OP_deref);
+    case dwarf::DW_OP_deref: {
+      assert(LocationKind != Register);
+      if (LocationKind != Memory && isMemoryLocation(ExprCursor))
+        // Turning this into a memory location description makes the deref
+        // implicit.
+        LocationKind = Memory;
+      else
+        emitOp(dwarf::DW_OP_deref);
       break;
+    }
     case dwarf::DW_OP_constu:
+      assert(LocationKind != Register && LocationKind != Memory);
       emitOp(dwarf::DW_OP_constu);
       emitUnsigned(Op->getArg(0));
       break;
     case dwarf::DW_OP_stack_value:
-      addStackValue();
+      assert(LocationKind != Register && LocationKind != Memory);
       break;
     case dwarf::DW_OP_swap:
+      assert(LocationKind != Register && LocationKind != Memory);
       emitOp(dwarf::DW_OP_swap);
       break;
     case dwarf::DW_OP_xderef:
+      assert(LocationKind != Register && LocationKind != Memory);
       emitOp(dwarf::DW_OP_xderef);
       break;
     default:
       llvm_unreachable("unhandled opcode found in expression");
     }
   }
+
+  if (LocationKind == Register || LocationKind == Memory)
+    return;
+
+  // Turn this into an implicit location description.
+  addStackValue();
 }
 
 /// add masking operations to stencil out a subregister.
Index: test/Bitcode/DIExpression-deref.ll
===================================================================
--- /dev/null
+++ test/Bitcode/DIExpression-deref.ll
@@ -0,0 +1,27 @@
+; RUN: llvm-dis -o - %s.bc | FileCheck %s
+
+!llvm.dbg.cu = !{!1}
+!llvm.module.flags = !{!20, !21}
+
+!0 = distinct !DIGlobalVariable(name: "g", scope: !1, file: !2, line: 1, type: !5, isLocal: false, isDefinition: true)
+!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: "clang (llvm/trunk 288154)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !4)
+!2 = !DIFile(filename: "a.c", directory: "/")
+!3 = !{}
+!4 = !{!10, !11, !12, !13}
+!5 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+; DW_OP_deref should be moved to the back of the expression.
+;
+; CHECK: !DIExpression(DW_OP_plus, 0, DW_OP_deref, DW_OP_LLVM_fragment, 8, 32)
+!6 = !DIExpression(DW_OP_deref, DW_OP_plus, 0, DW_OP_LLVM_fragment, 8, 32)
+; CHECK: !DIExpression(DW_OP_plus, 0, DW_OP_deref)
+!7 = !DIExpression(DW_OP_deref, DW_OP_plus, 0)
+; CHECK: !DIExpression(DW_OP_plus, 1, DW_OP_deref)
+!8 = !DIExpression(DW_OP_plus, 1, DW_OP_deref)
+; CHECK: !DIExpression(DW_OP_deref)
+!9 = !DIExpression(DW_OP_deref)
+!10 = !DIGlobalVariableExpression(var: !0, expr: !6)
+!11 = !DIGlobalVariableExpression(var: !0, expr: !7)
+!12 = !DIGlobalVariableExpression(var: !0, expr: !8)
+!13 = !DIGlobalVariableExpression(var: !0, expr: !9)
+!20 = !{i32 2, !"Dwarf Version", i32 4}
+!21 = !{i32 2, !"Debug Info Version", i32 3}
Index: test/DebugInfo/X86/block-capture.ll
===================================================================
--- test/DebugInfo/X86/block-capture.ll
+++ test/DebugInfo/X86/block-capture.ll
@@ -123,7 +123,7 @@
 !66 = !DILocation(line: 2, column: 20, scope: !8)
 !67 = !DILocation(line: 2, column: 21, scope: !8)
 !68 = !DILocalVariable(name: "block", line: 2, scope: !8, file: !5, type: !25)
-!69 = !DIExpression(DW_OP_deref, DW_OP_plus, 32)
+!69 = !DIExpression(DW_OP_plus, 32, DW_OP_deref)
 !70 = !DILocation(line: 2, column: 9, scope: !8)
 !71 = !DILocation(line: 2, column: 23, scope: !72)
 !72 = distinct !DILexicalBlock(line: 2, column: 21, file: !1, scope: !8)
Index: test/DebugInfo/X86/debug-info-block-captured-self.ll
===================================================================
--- test/DebugInfo/X86/debug-info-block-captured-self.ll
+++ test/DebugInfo/X86/debug-info-block-captured-self.ll
@@ -107,5 +107,5 @@
 !106 = !DILocation(line: 40, scope: !42)
 !107 = !DIFile(filename: "llvm/tools/clang/test/CodeGenObjC/debug-info-block-captured-self.m", directory: "")
 !108 = !{i32 1, !"Debug Info Version", i32 3}
-!109 = !DIExpression(DW_OP_deref, DW_OP_plus, 32)
-!110 = !DIExpression(DW_OP_deref, DW_OP_plus, 32)
+!109 = !DIExpression(DW_OP_plus, 32, DW_OP_deref)
+!110 = !DIExpression(DW_OP_plus, 32, DW_OP_deref)
Index: test/DebugInfo/X86/debug-info-blocks.ll
===================================================================
--- test/DebugInfo/X86/debug-info-blocks.ll
+++ test/DebugInfo/X86/debug-info-blocks.ll
@@ -380,4 +380,4 @@
 !108 = !DILocation(line: 61, scope: !36)
 !109 = !DILocation(line: 62, scope: !36)
 !110 = !{i32 1, !"Debug Info Version", i32 3}
-!111 = !DIExpression(DW_OP_deref, DW_OP_plus, 32)
+!111 = !DIExpression(DW_OP_plus, 32, DW_OP_deref)
Index: test/DebugInfo/X86/dw_op_minus.ll
===================================================================
--- test/DebugInfo/X86/dw_op_minus.ll
+++ test/DebugInfo/X86/dw_op_minus.ll
@@ -10,7 +10,7 @@
 ;   Capture(buf);
 ; }
 ; }
-; The interesting part is !DIExpression(DW_OP_deref, DW_OP_minus, 400)
+; The interesting part is !DIExpression(DW_OP_minus, 400, DW_OP_deref)
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
@@ -56,20 +56,17 @@
 !14 = !{i32 2, !"Debug Info Version", i32 3}
 !15 = !{!"clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)"}
 !16 = !DILocation(line: 5, column: 3, scope: !4)
-!17 = !DIExpression(DW_OP_deref, DW_OP_minus, 400)
+!17 = !DIExpression(DW_OP_minus, 400, DW_OP_deref)
 !18 = !DILocation(line: 5, column: 7, scope: !4)
 !19 = !DILocation(line: 6, column: 11, scope: !4)
 !20 = !DILocation(line: 6, column: 3, scope: !4)
 !21 = !DILocation(line: 7, column: 1, scope: !4)
 
 ; RCX - 400
-; CHECK:      .short	6                       # Loc expr size
+; CHECK:      .short	3                       # Loc expr size
 ; CHECK-NEXT: .byte	114                     # DW_OP_breg2
-; CHECK-NEXT: .byte	0                       # 0
-; CHECK-NEXT: .byte	16                      # DW_OP_constu
-; CHECK-NEXT: .byte	144                     # 400
-; CHECK-NEXT: .byte	3                       # DW_OP_minus
-; CHECK-NEXT: .byte	28
+; CHECK-NEXT: .byte	240                     # -400
+; CHECK-NEXT: .byte	124
 
 ; RCX is clobbered in call @Capture, but there is a spilled copy.
 ; *(RSP + 8) - 400
Index: test/DebugInfo/X86/dw_op_minus_direct.ll
===================================================================
--- test/DebugInfo/X86/dw_op_minus_direct.ll
+++ test/DebugInfo/X86/dw_op_minus_direct.ll
@@ -8,8 +8,8 @@
 
 ; CHECK: Beginning address offset: 0x0000000000000000
 ; CHECK:    Ending address offset: 0x0000000000000004
-; CHECK:     Location description: 50 10 ff ff ff ff 0f 1a 10 01 1c
-;                                  rax, constu 0xffffffff, and, constu 0x00000001, minus
+; CHECK:     Location description: 70 7f 9f
+;                                  breg0 -1, stack-value
 source_filename = "minus.c"
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.12.0"