Index: include/llvm/CodeGen/MachineOperand.h
===================================================================
--- include/llvm/CodeGen/MachineOperand.h
+++ include/llvm/CodeGen/MachineOperand.h
@@ -593,6 +593,9 @@
   /// ChangeToMCSymbol - Replace this operand with a new MC symbol operand.
   void ChangeToMCSymbol(MCSymbol *Sym);
 
+  /// Replace this operand with a frame index.
+  void ChangeToFrameIndex(int Idx);
+
   /// ChangeToRegister - Replace this operand with a new register operand of
   /// the specified value.  If an operand is known to be an register already,
   /// the setReg method should be used.
Index: lib/CodeGen/MachineInstr.cpp
===================================================================
--- lib/CodeGen/MachineInstr.cpp
+++ lib/CodeGen/MachineInstr.cpp
@@ -175,6 +175,16 @@
   Contents.Sym = Sym;
 }
 
+void MachineOperand::ChangeToFrameIndex(int Idx) {
+  assert((!isReg() || !isTied()) &&
+         "Cannot change a tied operand into a FrameIndex");
+
+  removeRegFromUses();
+
+  OpKind = MO_FrameIndex;
+  setIndex(Idx);
+}
+
 /// ChangeToRegister - Replace this operand with a new register operand of
 /// the specified value.  If an operand is known to be an register already,
 /// the setReg method should be used.
Index: lib/Target/AMDGPU/SIFoldOperands.cpp
===================================================================
--- lib/Target/AMDGPU/SIFoldOperands.cpp
+++ lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -48,24 +48,36 @@
 
 struct FoldCandidate {
   MachineInstr *UseMI;
-  unsigned UseOpNo;
-  MachineOperand *OpToFold;
-  uint64_t ImmToFold;
+  union {
+    MachineOperand *OpToFold;
+    uint64_t ImmToFold;
+    int FrameIndexToFold;
+  };
+  unsigned char UseOpNo;
+  MachineOperand::MachineOperandType Kind;
 
   FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
-                UseMI(MI), UseOpNo(OpNo) {
-
+    UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) {
     if (FoldOp->isImm()) {
-      OpToFold = nullptr;
       ImmToFold = FoldOp->getImm();
+    } else if (FoldOp->isFI()) {
+      FrameIndexToFold = FoldOp->getIndex();
     } else {
       assert(FoldOp->isReg());
       OpToFold = FoldOp;
     }
   }
 
+  bool isFI() const {
+    return Kind == MachineOperand::MO_FrameIndex;
+  }
+
   bool isImm() const {
-    return !OpToFold;
+    return Kind == MachineOperand::MO_Immediate;
+  }
+
+  bool isReg() const {
+    return Kind == MachineOperand::MO_Register;
   }
 };
 
@@ -107,6 +119,11 @@
     return true;
   }
 
+  if (Fold.isFI()) {
+    Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
+    return true;
+  }
+
   MachineOperand *New = Fold.OpToFold;
   if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
       TargetRegisterInfo::isVirtualRegister(New->getReg())) {
@@ -328,7 +345,7 @@
 
       unsigned OpSize = TII->getOpSize(MI, 1);
       MachineOperand &OpToFold = MI.getOperand(1);
-      bool FoldingImm = OpToFold.isImm();
+      bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
 
       // FIXME: We could also be folding things like FrameIndexes and
       // TargetIndexes.
@@ -380,7 +397,7 @@
       for (FoldCandidate &Fold : FoldList) {
         if (updateOperand(Fold, TRI)) {
           // Clear kill flags.
-          if (!Fold.isImm()) {
+          if (Fold.isReg()) {
             assert(Fold.OpToFold && Fold.OpToFold->isReg());
             // FIXME: Probably shouldn't bother trying to fold if not an
             // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
Index: test/CodeGen/AMDGPU/local-stack-slot-bug.ll
===================================================================
--- test/CodeGen/AMDGPU/local-stack-slot-bug.ll
+++ test/CodeGen/AMDGPU/local-stack-slot-bug.ll
@@ -7,10 +7,8 @@
 ;
 ; CHECK-LABEL: {{^}}main:
 ; CHECK: v_lshlrev_b32_e32 [[BYTES:v[0-9]+]], 2, v0
-; CHECK: v_mov_b32_e32 [[HI_CONST:v[0-9]+]], 0x200
-; CHECK: v_mov_b32_e32 [[LO_CONST:v[0-9]+]], 0
-; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, [[BYTES]], [[HI_CONST]]
-; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, [[BYTES]], [[LO_CONST]]
+; CHECK: v_add_i32_e32 [[HI_OFF:v[0-9]+]], vcc, 0x200, [[BYTES]]
+; CHECK: v_add_i32_e32 [[LO_OFF:v[0-9]+]], vcc, 0, [[BYTES]]
 ; CHECK: buffer_load_dword {{v[0-9]+}}, [[LO_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
 ; CHECK: buffer_load_dword {{v[0-9]+}}, [[HI_OFF]], {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen
 define amdgpu_ps float @main(i32 %idx) {