Index: lib/Target/Mips/MicroMipsSizeReduction.cpp
===================================================================
--- lib/Target/Mips/MicroMipsSizeReduction.cpp
+++ lib/Target/Mips/MicroMipsSizeReduction.cpp
@@ -10,7 +10,6 @@
 /// This pass is used to reduce the size of instructions where applicable.
 ///
 /// TODO: Implement microMIPS64 support.
-/// TODO: Implement support for reducing into lwp/swp instruction.
 //===----------------------------------------------------------------------===//
 #include "Mips.h"
 #include "MipsInstrInfo.h"
@@ -22,8 +21,10 @@
 using namespace llvm;
 
 #define DEBUG_TYPE "micromips-reduce-size"
+#define MICROMIPS_SIZE_REDUCE_NAME "MicroMips instruction size reduce pass"
 
-STATISTIC(NumReduced, "Number of 32-bit instructions reduced to 16-bit ones");
+STATISTIC(NumReduced, "Number of instructions reduced (32-bit to 16-bit ones, "
+                      "or two instructions into one");
 
 namespace {
 
@@ -35,12 +36,15 @@
   OT_Operands02,  ///< Transfer operands 0 and 2
   OT_Operand2,    ///< Transfer just operand 2
   OT_OperandsXOR, ///< Transfer operands for XOR16
+  OT_OperandsLwp, ///< Transfer operands for LWP
+  OT_OperandsSwp, ///< Transfer operands for SWP
 };
 
 /// Reduction type
 // TODO: Will be extended when additional optimizations are added
 enum ReduceType {
-  RT_OneInstr ///< Reduce one instruction into a smaller instruction
+  RT_TwoInstr, ///< Reduce two instructions into one instruction
+  RT_OneInstr  ///< Reduce one instruction into a smaller instruction
 };
 
 // Information about immediate field restrictions
@@ -76,21 +80,22 @@
   unsigned NarrowOpc; ///< Narrow opcode
 };
 
+typedef struct ReduceEntryFunArgs ReduceEntryFunArgs;
+
 /// ReduceTable - A static table with information on mapping from wide
 /// opcodes to narrow
 struct ReduceEntry {
 
   enum ReduceType eRType; ///< Reduction type
   bool (*ReduceFunction)(
-      MachineInstr *MI,
-      const ReduceEntry &Entry); ///< Pointer to reduce function
-  struct OpCodes Ops;            ///< All relevant OpCodes
-  struct OpInfo OpInf;           ///< Characteristics of operands
-  struct ImmField Imm;           ///< Characteristics of immediate field
+      ReduceEntryFunArgs *Arguments); ///< Pointer to reduce function
+  struct OpCodes Ops;                 ///< All relevant OpCodes
+  struct OpInfo OpInf;                ///< Characteristics of operands
+  struct ImmField Imm;                ///< Characteristics of immediate field
 
   ReduceEntry(enum ReduceType RType, struct OpCodes Op,
-              bool (*F)(MachineInstr *MI, const ReduceEntry &Entry),
-              struct OpInfo OpInf, struct ImmField Imm)
+              bool (*F)(ReduceEntryFunArgs *Arguments), struct OpInfo OpInf,
+              struct ImmField Imm)
       : eRType(RType), ReduceFunction(F), Ops(Op), OpInf(OpInf), Imm(Imm) {}
 
   unsigned NarrowOpc() const { return Ops.NarrowOpc; }
@@ -113,6 +118,22 @@
   }
 };
 
+// Function arguments for ReduceFunction
+struct ReduceEntryFunArgs {
+  MachineInstr *MI;         // Instruction
+  const ReduceEntry &Entry; // Entry field
+  MachineBasicBlock::instr_iterator
+      &NextMII; // Iterator to next instruction in block
+  const MachineBasicBlock::instr_iterator &E; // End iterator
+
+  ReduceEntryFunArgs(MachineInstr *argMI, const ReduceEntry &argEntry,
+                     MachineBasicBlock::instr_iterator &argNextMII,
+                     const MachineBasicBlock::instr_iterator &argE)
+      : MI(argMI), Entry(argEntry), NextMII(argNextMII), E(argE) {}
+};
+
+typedef llvm::SmallVector<ReduceEntry, 32> ReduceEntryVector;
+
 class MicroMipsSizeReduce : public MachineFunctionPass {
 public:
   static char ID;
@@ -132,50 +153,58 @@
   bool ReduceMBB(MachineBasicBlock &MBB);
 
   /// Attempts to reduce MI, returns true on success.
-  bool ReduceMI(const MachineBasicBlock::instr_iterator &MII);
+  bool ReduceMI(const MachineBasicBlock::instr_iterator &MII,
+                MachineBasicBlock::instr_iterator &NextMII,
+                const MachineBasicBlock::instr_iterator &E);
 
   // Attempts to reduce LW/SW instruction into LWSP/SWSP,
   // returns true on success.
-  static bool ReduceXWtoXWSP(MachineInstr *MI, const ReduceEntry &Entry);
+  static bool ReduceXWtoXWSP(ReduceEntryFunArgs *Arguments);
+
+  // Attempts to reduce two LW/SW instructions into LWP/SWP instruction,
+  // returns true on success.
+  static bool ReduceXWtoXWP(ReduceEntryFunArgs *Arguments);
 
   // Attempts to reduce LBU/LHU instruction into LBU16/LHU16,
   // returns true on success.
-  static bool ReduceLXUtoLXU16(MachineInstr *MI, const ReduceEntry &Entry);
+  static bool ReduceLXUtoLXU16(ReduceEntryFunArgs *Arguments);
 
   // Attempts to reduce SB/SH instruction into SB16/SH16,
   // returns true on success.
-  static bool ReduceSXtoSX16(MachineInstr *MI, const ReduceEntry &Entry);
+  static bool ReduceSXtoSX16(ReduceEntryFunArgs *Arguments);
 
   // Attempts to reduce arithmetic instructions, returns true on success.
-  static bool ReduceArithmeticInstructions(MachineInstr *MI,
-                                           const ReduceEntry &Entry);
+  static bool ReduceArithmeticInstructions(ReduceEntryFunArgs *Arguments);
 
   // Attempts to reduce ADDIU into ADDIUSP instruction,
   // returns true on success.
-  static bool ReduceADDIUToADDIUSP(MachineInstr *MI, const ReduceEntry &Entry);
+  static bool ReduceADDIUToADDIUSP(ReduceEntryFunArgs *Arguments);
 
   // Attempts to reduce ADDIU into ADDIUR1SP instruction,
   // returns true on success.
-  static bool ReduceADDIUToADDIUR1SP(MachineInstr *MI,
-                                     const ReduceEntry &Entry);
+  static bool ReduceADDIUToADDIUR1SP(ReduceEntryFunArgs *Arguments);
 
   // Attempts to reduce XOR into XOR16 instruction,
   // returns true on success.
-  static bool ReduceXORtoXOR16(MachineInstr *MI, const ReduceEntry &Entry);
+  static bool ReduceXORtoXOR16(ReduceEntryFunArgs *Arguments);
 
-  // Changes opcode of an instruction.
-  static bool ReplaceInstruction(MachineInstr *MI, const ReduceEntry &Entry);
+  // Changes opcode of an instruction, replaces an instruction with a
+  // new one, or replaces two instructions with a new instruction
+  // depending on their order i.e. if these are consecutive forward
+  // or consecutive backward
+  static bool ReplaceInstruction(MachineInstr *MI, const ReduceEntry &Entry,
+                                 MachineInstr *MI2 = nullptr,
+                                 bool ConsecutiveForward = true);
 
   // Table with transformation rules for each instruction.
-  static llvm::SmallVector<ReduceEntry, 16> ReduceTable;
+  static ReduceEntryVector ReduceTable;
 };
 
-char MicroMipsSizeReduce::ID = 0;
 const MipsInstrInfo *MicroMipsSizeReduce::MipsII;
 
 // This table must be sorted by WideOpc as a main criterion and
 // ReduceType as a sub-criterion (when wide opcodes are the same).
-llvm::SmallVector<ReduceEntry, 16> MicroMipsSizeReduce::ReduceTable = {
+ReduceEntryVector MicroMipsSizeReduce::ReduceTable = {
 
     // ReduceType, OpCodes, ReduceFunction,
     // OpInfo(TransferOperands),
@@ -204,8 +233,12 @@
      OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)},
     {RT_OneInstr, OpCodes(Mips::LHu_MM, Mips::LHU16_MM), ReduceLXUtoLXU16,
      OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)},
+    {RT_TwoInstr, OpCodes(Mips::LW, Mips::LWP_MM), ReduceXWtoXWP,
+     OpInfo(OT_OperandsLwp), ImmField(0, -2048, 2048, 2)},
     {RT_OneInstr, OpCodes(Mips::LW, Mips::LWSP_MM), ReduceXWtoXWSP,
      OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
+    {RT_TwoInstr, OpCodes(Mips::LW_MM, Mips::LWP_MM), ReduceXWtoXWP,
+     OpInfo(OT_OperandsLwp), ImmField(0, -2048, 2048, 2)},
     {RT_OneInstr, OpCodes(Mips::LW_MM, Mips::LWSP_MM), ReduceXWtoXWSP,
      OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
     {RT_OneInstr, OpCodes(Mips::SB, Mips::SB16_MM), ReduceSXtoSX16,
@@ -222,15 +255,25 @@
     {RT_OneInstr, OpCodes(Mips::SUBu_MM, Mips::SUBU16_MM),
      ReduceArithmeticInstructions, OpInfo(OT_OperandsAll),
      ImmField(0, 0, 0, -1)},
+    {RT_TwoInstr, OpCodes(Mips::SW, Mips::SWP_MM), ReduceXWtoXWP,
+     OpInfo(OT_OperandsSwp), ImmField(0, -2048, 2048, 2)},
     {RT_OneInstr, OpCodes(Mips::SW, Mips::SWSP_MM), ReduceXWtoXWSP,
      OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
+    {RT_TwoInstr, OpCodes(Mips::SW_MM, Mips::SWP_MM), ReduceXWtoXWP,
+     OpInfo(OT_OperandsSwp), ImmField(0, -2048, 2048, 2)},
     {RT_OneInstr, OpCodes(Mips::SW_MM, Mips::SWSP_MM), ReduceXWtoXWSP,
      OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
     {RT_OneInstr, OpCodes(Mips::XOR, Mips::XOR16_MM), ReduceXORtoXOR16,
      OpInfo(OT_OperandsXOR), ImmField(0, 0, 0, -1)},
     {RT_OneInstr, OpCodes(Mips::XOR_MM, Mips::XOR16_MM), ReduceXORtoXOR16,
      OpInfo(OT_OperandsXOR), ImmField(0, 0, 0, -1)}};
-} // namespace
+
+  char MicroMipsSizeReduce::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(MicroMipsSizeReduce, DEBUG_TYPE, MICROMIPS_SIZE_REDUCE_NAME,
+                false, false)
 
 // Returns true if the machine operand MO is register SP.
 static bool IsSP(const MachineOperand &MO) {
@@ -297,37 +340,99 @@
   return true;
 }
 
+// Returns true if MI can be reduced to lwp/swp instruciton
+static bool CheckXWPInstr(MachineInstr *MI, bool ReduceToLwp,
+                          const ReduceEntry &Entry) {
+
+  if (ReduceToLwp &&
+      !(MI->getOpcode() == Mips::LW || MI->getOpcode() == Mips::LW_MM))
+    return false;
+
+  if (!ReduceToLwp &&
+      !(MI->getOpcode() == Mips::SW || MI->getOpcode() == Mips::SW_MM))
+    return false;
+
+  unsigned reg = MI->getOperand(0).getReg();
+  if (reg == Mips::RA)
+    return false;
+
+  if (!ImmInRange(MI, Entry))
+    return false;
+
+  if (ReduceToLwp && (MI->getOperand(0).getReg() == MI->getOperand(1).getReg()))
+    return false;
+
+  return true;
+}
+
+// Returns true if the registers Reg1 and Reg2 are consecutive
+static bool ConsecutiveRegisters(unsigned Reg1, unsigned Reg2) {
+  static SmallVector<unsigned, 31> Registers = {
+      Mips::AT, Mips::V0, Mips::V1, Mips::A0, Mips::A1, Mips::A2, Mips::A3,
+      Mips::T0, Mips::T1, Mips::T2, Mips::T3, Mips::T4, Mips::T5, Mips::T6,
+      Mips::T7, Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5,
+      Mips::S6, Mips::S7, Mips::T8, Mips::T9, Mips::K0, Mips::K1, Mips::GP,
+      Mips::SP, Mips::FP, Mips::RA};
+
+  for (uint8_t i = 0; i < Registers.size() - 1; i++) {
+    if (Registers[i] == Reg1) {
+      if (Registers[i + 1] == Reg2)
+        return true;
+      else
+        return false;
+    }
+  }
+  return false;
+}
+
+// Returns true if registers and offsets are consecutive
+static bool ConsecutiveInstr(MachineInstr *MI1, MachineInstr *MI2) {
+
+  int64_t Offset1, Offset2;
+  if (!GetImm(MI1, 2, Offset1))
+    return false;
+  if (!GetImm(MI2, 2, Offset2))
+    return false;
+
+  unsigned Reg1 = MI1->getOperand(0).getReg();
+  unsigned Reg2 = MI2->getOperand(0).getReg();
+
+  return ((Offset1 == (Offset2 - 4)) && (ConsecutiveRegisters(Reg1, Reg2)));
+}
+
 MicroMipsSizeReduce::MicroMipsSizeReduce() : MachineFunctionPass(ID) {}
 
-bool MicroMipsSizeReduce::ReduceMI(
-    const MachineBasicBlock::instr_iterator &MII) {
+bool MicroMipsSizeReduce::ReduceMI(const MachineBasicBlock::instr_iterator &MII,
+                                   MachineBasicBlock::instr_iterator &NextMII,
+                                   const MachineBasicBlock::instr_iterator &E) {
 
   MachineInstr *MI = &*MII;
   unsigned Opcode = MI->getOpcode();
 
   // Search the table.
-  llvm::SmallVector<ReduceEntry, 16>::const_iterator Start =
-      std::begin(ReduceTable);
-  llvm::SmallVector<ReduceEntry, 16>::const_iterator End =
-      std::end(ReduceTable);
+  ReduceEntryVector::const_iterator Start = std::begin(ReduceTable);
+  ReduceEntryVector::const_iterator End = std::end(ReduceTable);
 
-  std::pair<llvm::SmallVector<ReduceEntry, 16>::const_iterator,
-            llvm::SmallVector<ReduceEntry, 16>::const_iterator>
+  std::pair<ReduceEntryVector::const_iterator,
+            ReduceEntryVector::const_iterator>
       Range = std::equal_range(Start, End, Opcode);
 
   if (Range.first == Range.second)
     return false;
 
-  for (llvm::SmallVector<ReduceEntry, 16>::const_iterator Entry = Range.first;
-       Entry != Range.second; ++Entry)
-    if (((*Entry).ReduceFunction)(&(*MII), *Entry))
+  for (ReduceEntryVector::const_iterator Entry = Range.first;
+       Entry != Range.second; ++Entry) {
+    ReduceEntryFunArgs Arguments(&(*MII), *Entry, NextMII, E);
+    if (((*Entry).ReduceFunction)(&Arguments))
       return true;
-
+  }
   return false;
 }
 
-bool MicroMipsSizeReduce::ReduceXWtoXWSP(MachineInstr *MI,
-                                         const ReduceEntry &Entry) {
+bool MicroMipsSizeReduce::ReduceXWtoXWSP(ReduceEntryFunArgs *Arguments) {
+
+  MachineInstr *MI = Arguments->MI;
+  const ReduceEntry &Entry = Arguments->Entry;
 
   if (!ImmInRange(MI, Entry))
     return false;
@@ -338,8 +443,53 @@
   return ReplaceInstruction(MI, Entry);
 }
 
+bool MicroMipsSizeReduce::ReduceXWtoXWP(ReduceEntryFunArgs *Arguments) {
+
+  const ReduceEntry &Entry = Arguments->Entry;
+  const MachineBasicBlock::instr_iterator &E = Arguments->E;
+  MachineBasicBlock::instr_iterator &NextMII = Arguments->NextMII;
+
+  if (NextMII == E)
+    return false;
+
+  MachineInstr *MI1 = Arguments->MI;
+  MachineInstr *MI2 = &*NextMII;
+
+  // ReduceToLwp = true/false - reduce to LWP/SWP instruction
+  bool ReduceToLwp =
+      (MI1->getOpcode() == Mips::LW) || (MI1->getOpcode() == Mips::LW_MM);
+
+  if (!CheckXWPInstr(MI1, ReduceToLwp, Entry))
+    return false;
+
+  if (!CheckXWPInstr(MI2, ReduceToLwp, Entry))
+    return false;
+
+  bool Reduce = false;
+  bool ConsecutiveForward = false;
+  bool ConsecutiveBackward = false;
+
+  unsigned Reg1 = MI1->getOperand(1).getReg();
+  unsigned Reg2 = MI2->getOperand(1).getReg();
+
+  if (Reg1 == Reg2) {
+    ConsecutiveForward = ConsecutiveInstr(MI1, MI2);
+    ConsecutiveBackward = ConsecutiveInstr(MI2, MI1);
+    Reduce = ConsecutiveForward || ConsecutiveBackward;
+  }
+
+  if (!Reduce)
+    return false;
+
+  NextMII = std::next(NextMII);
+  return ReplaceInstruction(MI1, Entry, MI2, ConsecutiveForward);
+}
+
 bool MicroMipsSizeReduce::ReduceArithmeticInstructions(
-    MachineInstr *MI, const ReduceEntry &Entry) {
+    ReduceEntryFunArgs *Arguments) {
+
+  MachineInstr *MI = Arguments->MI;
+  const ReduceEntry &Entry = Arguments->Entry;
 
   if (!isMMThreeBitGPRegister(MI->getOperand(0)) ||
       !isMMThreeBitGPRegister(MI->getOperand(1)) ||
@@ -349,8 +499,11 @@
   return ReplaceInstruction(MI, Entry);
 }
 
-bool MicroMipsSizeReduce::ReduceADDIUToADDIUR1SP(MachineInstr *MI,
-                                                 const ReduceEntry &Entry) {
+bool MicroMipsSizeReduce::ReduceADDIUToADDIUR1SP(
+    ReduceEntryFunArgs *Arguments) {
+
+  MachineInstr *MI = Arguments->MI;
+  const ReduceEntry &Entry = Arguments->Entry;
 
   if (!ImmInRange(MI, Entry))
     return false;
@@ -361,8 +514,10 @@
   return ReplaceInstruction(MI, Entry);
 }
 
-bool MicroMipsSizeReduce::ReduceADDIUToADDIUSP(MachineInstr *MI,
-                                               const ReduceEntry &Entry) {
+bool MicroMipsSizeReduce::ReduceADDIUToADDIUSP(ReduceEntryFunArgs *Arguments) {
+
+  MachineInstr *MI = Arguments->MI;
+  const ReduceEntry &Entry = Arguments->Entry;
 
   int64_t ImmValue;
   if (!GetImm(MI, Entry.ImmField(), ImmValue))
@@ -377,8 +532,10 @@
   return ReplaceInstruction(MI, Entry);
 }
 
-bool MicroMipsSizeReduce::ReduceLXUtoLXU16(MachineInstr *MI,
-                                           const ReduceEntry &Entry) {
+bool MicroMipsSizeReduce::ReduceLXUtoLXU16(ReduceEntryFunArgs *Arguments) {
+
+  MachineInstr *MI = Arguments->MI;
+  const ReduceEntry &Entry = Arguments->Entry;
 
   if (!ImmInRange(MI, Entry))
     return false;
@@ -390,8 +547,10 @@
   return ReplaceInstruction(MI, Entry);
 }
 
-bool MicroMipsSizeReduce::ReduceSXtoSX16(MachineInstr *MI,
-                                         const ReduceEntry &Entry) {
+bool MicroMipsSizeReduce::ReduceSXtoSX16(ReduceEntryFunArgs *Arguments) {
+
+  MachineInstr *MI = Arguments->MI;
+  const ReduceEntry &Entry = Arguments->Entry;
 
   if (!ImmInRange(MI, Entry))
     return false;
@@ -403,8 +562,11 @@
   return ReplaceInstruction(MI, Entry);
 }
 
-bool MicroMipsSizeReduce::ReduceXORtoXOR16(MachineInstr *MI,
-                                           const ReduceEntry &Entry) {
+bool MicroMipsSizeReduce::ReduceXORtoXOR16(ReduceEntryFunArgs *Arguments) {
+
+  MachineInstr *MI = Arguments->MI;
+  const ReduceEntry &Entry = Arguments->Entry;
+
   if (!isMMThreeBitGPRegister(MI->getOperand(0)) ||
       !isMMThreeBitGPRegister(MI->getOperand(1)) ||
       !isMMThreeBitGPRegister(MI->getOperand(2)))
@@ -433,14 +595,16 @@
       continue;
 
     // Try to reduce 32-bit instruction into 16-bit instruction
-    Modified |= ReduceMI(MII);
+    Modified |= ReduceMI(MII, NextMII, E);
   }
 
   return Modified;
 }
 
 bool MicroMipsSizeReduce::ReplaceInstruction(MachineInstr *MI,
-                                             const ReduceEntry &Entry) {
+                                             const ReduceEntry &Entry,
+                                             MachineInstr *MI2,
+                                             bool ConsecutiveForward) {
 
   enum OperandTransfer OpTransfer = Entry.TransferOperands();
 
@@ -477,6 +641,41 @@
       }
       break;
     }
+    case OT_OperandsLwp:
+    case OT_OperandsSwp: {
+      if (ConsecutiveForward) {
+        MIB.add(MI->getOperand(0));
+        if(OpTransfer == OT_OperandsSwp)
+          MIB.add(MI2->getOperand(0));
+        else // OT_OperandsLwp
+          // FIXME: This should be MIB.add(MI2->getOperand(0));
+          // However, TabGen counts regpair as one output operand,
+          // and that introduces machine verfier error for lwp instruction.
+          // Setting the second register as undef, bypasses this error.
+          // This should not introduce bugs. The delay slot filler is
+          // aware that it should not put lwp/swp instructions in delay slots.
+          MIB.addReg(MI2->getOperand(0).getReg(), RegState::Undef);
+        MIB.add(MI->getOperand(1));
+        MIB.add(MI->getOperand(2));
+      } else { // consecutive backward
+        MIB.add(MI2->getOperand(0));
+        if(OpTransfer == OT_OperandsSwp)
+          MIB.add(MI->getOperand(0));
+        else // OT_OperandsLwp
+          // FIXME: This should be MIB.add(MI->getOperand(0));
+          // See the comment above.
+          MIB.addReg(MI->getOperand(0).getReg(), RegState::Undef);
+        MIB.add(MI2->getOperand(1));
+        MIB.add(MI2->getOperand(2));
+      }
+
+      DEBUG(dbgs() << "and converting 32-bit: " << *MI2
+                   << "       to: " << *MIB);
+
+      MBB.erase_instr(MI);
+      MBB.erase_instr(MI2);
+      return true;
+    }
     default:
       llvm_unreachable("Unknown operand transfer!");
     }
@@ -511,6 +710,6 @@
 }
 
 /// Returns an instance of the MicroMips size reduction pass.
-FunctionPass *llvm::createMicroMipsSizeReductionPass() {
+FunctionPass *llvm::createMicroMipsSizeReducePass() {
   return new MicroMipsSizeReduce();
 }
Index: lib/Target/Mips/Mips.h
===================================================================
--- lib/Target/Mips/Mips.h
+++ lib/Target/Mips/Mips.h
@@ -22,6 +22,7 @@
   class MipsTargetMachine;
   class ModulePass;
   class FunctionPass;
+  class PassRegistry;
 
   ModulePass *createMipsOs16Pass();
   ModulePass *createMips16HardFloatPass();
@@ -32,7 +33,9 @@
   FunctionPass *createMipsHazardSchedule();
   FunctionPass *createMipsLongBranchPass();
   FunctionPass *createMipsConstantIslandPass();
-  FunctionPass *createMicroMipsSizeReductionPass();
+  FunctionPass *createMicroMipsSizeReducePass();
+
+  void initializeMicroMipsSizeReducePass(PassRegistry &);
 } // end namespace llvm;
 
 #endif
Index: lib/Target/Mips/MipsDelaySlotFiller.cpp
===================================================================
--- lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -720,6 +720,10 @@
         (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
          Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL))
       continue;
+     // Instructions LWP/SWP should not be in a delay slot as that
+     // results in unpredictable behaviour
+     if (InMicroMipsMode && (Opcode == Mips::LWP_MM || Opcode == Mips::SWP_MM))
+       continue;
 
     Filler = CurrI;
     return true;
Index: lib/Target/Mips/MipsTargetMachine.cpp
===================================================================
--- lib/Target/Mips/MipsTargetMachine.cpp
+++ lib/Target/Mips/MipsTargetMachine.cpp
@@ -46,6 +46,10 @@
   RegisterTargetMachine<MipselTargetMachine> Y(getTheMipselTarget());
   RegisterTargetMachine<MipsebTargetMachine> A(getTheMips64Target());
   RegisterTargetMachine<MipselTargetMachine> B(getTheMips64elTarget());
+
+  PassRegistry &Registry = *PassRegistry::getPassRegistry();
+  initializeMicroMipsSizeReducePass(Registry);
+
 }
 
 static std::string computeDataLayout(const Triple &TT, StringRef CPU,
@@ -275,7 +279,7 @@
 // machine code is emitted. return true if -print-machineinstrs should
 // print out the code after the passes.
 void MipsPassConfig::addPreEmitPass() {
-  addPass(createMicroMipsSizeReductionPass());
+  addPass(createMicroMipsSizeReducePass());
 
   // The delay slot filler and the long branch passes can potientially create
   // forbidden slot/ hazards for MIPSR6 which the hazard schedule pass will
Index: test/CodeGen/Mips/llvm-ir/and.ll
===================================================================
--- test/CodeGen/Mips/llvm-ir/and.ll
+++ test/CodeGen/Mips/llvm-ir/and.ll
@@ -341,8 +341,7 @@
 ;
 ; MM32R3-LABEL: and_i128:
 ; MM32R3:       # %bb.0: # %entry
-; MM32R3-NEXT:    lw $3, 20($sp)
-; MM32R3-NEXT:    lw $2, 16($sp)
+; MM32R3-NEXT:    lwp $2, 16($sp)
 ; MM32R3-NEXT:    and16 $2, $4
 ; MM32R3-NEXT:    and16 $3, $5
 ; MM32R3-NEXT:    lw $4, 24($sp)
Index: test/CodeGen/Mips/llvm-ir/ashr.ll
===================================================================
--- test/CodeGen/Mips/llvm-ir/ashr.ll
+++ test/CodeGen/Mips/llvm-ir/ashr.ll
@@ -791,8 +791,7 @@
 ; MMR3:       # %bb.0: # %entry
 ; MMR3-NEXT:    addiusp -48
 ; MMR3-NEXT:    .cfi_def_cfa_offset 48
-; MMR3-NEXT:    sw $17, 44($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    sw $16, 40($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    swp $16, 40($sp)
 ; MMR3-NEXT:    .cfi_offset 17, -4
 ; MMR3-NEXT:    .cfi_offset 16, -8
 ; MMR3-NEXT:    move $8, $7
@@ -870,8 +869,7 @@
 ; MMR3-NEXT:    movn $2, $11, $10
 ; MMR3-NEXT:    move $3, $8
 ; MMR3-NEXT:    move $4, $1
-; MMR3-NEXT:    lw $16, 40($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    lw $17, 44($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    lwp $16, 40($sp)
 ; MMR3-NEXT:    addiusp 48
 ; MMR3-NEXT:    jrc $ra
 ;
Index: test/CodeGen/Mips/llvm-ir/lshr.ll
===================================================================
--- test/CodeGen/Mips/llvm-ir/lshr.ll
+++ test/CodeGen/Mips/llvm-ir/lshr.ll
@@ -818,8 +818,7 @@
 ; MMR3:       # %bb.0: # %entry
 ; MMR3-NEXT:    addiusp -48
 ; MMR3-NEXT:    .cfi_def_cfa_offset 48
-; MMR3-NEXT:    sw $17, 44($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    sw $16, 40($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    swp $16, 40($sp)
 ; MMR3-NEXT:    .cfi_offset 17, -4
 ; MMR3-NEXT:    .cfi_offset 16, -8
 ; MMR3-NEXT:    move $8, $7
@@ -899,8 +898,7 @@
 ; MMR3-NEXT:    li16 $4, 0
 ; MMR3-NEXT:    movz $2, $4, $10
 ; MMR3-NEXT:    move $4, $1
-; MMR3-NEXT:    lw $16, 40($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    lw $17, 44($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    lwp $16, 40($sp)
 ; MMR3-NEXT:    addiusp 48
 ; MMR3-NEXT:    jrc $ra
 ;
Index: test/CodeGen/Mips/llvm-ir/or.ll
===================================================================
--- test/CodeGen/Mips/llvm-ir/or.ll
+++ test/CodeGen/Mips/llvm-ir/or.ll
@@ -182,8 +182,7 @@
 ;
 ; MM32-LABEL: or_i128:
 ; MM32:       # %bb.0: # %entry
-; MM32-NEXT:    lw $3, 20($sp)
-; MM32-NEXT:    lw $2, 16($sp)
+; MM32-NEXT:    lwp $2, 16($sp)
 ; MM32-NEXT:    or16 $2, $4
 ; MM32-NEXT:    or16 $3, $5
 ; MM32-NEXT:    lw $4, 24($sp)
Index: test/CodeGen/Mips/llvm-ir/shl.ll
===================================================================
--- test/CodeGen/Mips/llvm-ir/shl.ll
+++ test/CodeGen/Mips/llvm-ir/shl.ll
@@ -847,8 +847,7 @@
 ; MMR3:       # %bb.0: # %entry
 ; MMR3-NEXT:    addiusp -48
 ; MMR3-NEXT:    .cfi_def_cfa_offset 48
-; MMR3-NEXT:    sw $17, 44($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    sw $16, 40($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    swp $16, 40($sp)
 ; MMR3-NEXT:    .cfi_offset 17, -4
 ; MMR3-NEXT:    .cfi_offset 16, -8
 ; MMR3-NEXT:    sw $7, 8($sp) # 4-byte Folded Spill
@@ -930,8 +929,7 @@
 ; MMR3-NEXT:    move $2, $3
 ; MMR3-NEXT:    move $3, $8
 ; MMR3-NEXT:    move $5, $6
-; MMR3-NEXT:    lw $16, 40($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    lw $17, 44($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    lwp $16, 40($sp)
 ; MMR3-NEXT:    addiusp 48
 ; MMR3-NEXT:    jrc $ra
 ;
Index: test/CodeGen/Mips/llvm-ir/xor.ll
===================================================================
--- test/CodeGen/Mips/llvm-ir/xor.ll
+++ test/CodeGen/Mips/llvm-ir/xor.ll
@@ -339,8 +339,7 @@
 ;
 ; MM32R3-LABEL: xor_i128:
 ; MM32R3:       # %bb.0: # %entry
-; MM32R3-NEXT:    lw $3, 20($sp)
-; MM32R3-NEXT:    lw $2, 16($sp)
+; MM32R3-NEXT:    lwp $2, 16($sp)
 ; MM32R3-NEXT:    xor16 $2, $4
 ; MM32R3-NEXT:    xor16 $3, $5
 ; MM32R3-NEXT:    lw $4, 24($sp)
Index: test/CodeGen/Mips/micromips-sizereduction/micromips-lwp-swp.ll
===================================================================
--- /dev/null
+++ test/CodeGen/Mips/micromips-sizereduction/micromips-lwp-swp.ll
@@ -0,0 +1,15 @@
+; RUN: llc -march=mipsel -mattr=+micromips -mcpu=mips32r2 -verify-machineinstrs \
+; RUN:     < %s | FileCheck %s
+
+; Function Attrs: nounwind
+define i32 @fun(i32* %adr, i32 %val) {
+entry:
+; CHECK: swp
+; CHECK: lwp
+  %call1 =  call i32* @fun1()
+  store i32 %val, i32* %adr, align 4
+  ret i32 0
+}
+
+declare i32* @fun1()
+
Index: test/CodeGen/Mips/micromips-sizereduction/micromips-lwp-swp.mir
===================================================================
--- /dev/null
+++ test/CodeGen/Mips/micromips-sizereduction/micromips-lwp-swp.mir
@@ -0,0 +1,84 @@
+# RUN: llc -march=mipsel -mattr=+micromips -mcpu=mips32r2 \
+# RUN:     -verify-machineinstrs -run-pass micromips-reduce-size \
+# RUN:      %s -o - | FileCheck %s
+
+--- |
+  ; ModuleID = '<stdin>'
+  source_filename = "<stdin>"
+  target datalayout = "e-m:m-p:32:32-i8:8:32-i16:16:32-i64:64-n32-S64"
+
+  define i32 @fun(i32* %adr, i32 %val) #0 {
+  entry:
+    %call1 = call i32* @fun1()
+    ret i32 %val
+  }
+
+  declare i32* @fun1() #0
+
+  ; Function Attrs: nounwind
+  declare void @llvm.stackprotector(i8*, i8**) #1
+
+  attributes #0 = { "target-cpu"="mips32r2" "target-features"="+micromips" }
+  attributes #1 = { nounwind }
+
+...
+---
+# CHECK-LABEL: name: fun
+# CHECK-NOT: SWP_MM
+# CHECK-NOT: LWP_MM
+name:            fun
+alignment:       2
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+registers:
+liveins:
+  - { reg: '%a1', virtual-reg: '' }
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       24
+  offsetAdjustment: 0
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+  stackProtector:  ''
+  maxCallFrameSize: 16
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:
+stack:
+  - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4,
+      stack-id: 0, callee-saved-register: '%ra', callee-saved-restored: true,
+      di-variable: '', di-expression: '', di-location: '' }
+  - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4,
+      stack-id: 0, callee-saved-register: '%s0', callee-saved-restored: true,
+      di-variable: '', di-expression: '', di-location: '' }
+constants:
+body:             |
+  bb.0.entry:
+    liveins: %a1, %ra, %s0
+
+    %sp = ADDiu %sp, -24
+    CFI_INSTRUCTION def_cfa_offset 24
+    SW killed %ra, %sp, 20 :: (store 4 into %stack.0)
+    SW killed %s0, %sp, 16 :: (store 4 into %stack.1)
+    CFI_INSTRUCTION offset %ra_64, -4
+    CFI_INSTRUCTION offset %s0_64, -8
+    %s0 = MOVE16_MM %a1
+    JAL @fun1, csr_o32, implicit-def dead %ra, implicit-def %sp, implicit-def dead %v0
+    %v0 = MOVE16_MM killed %s0
+    %s0 = LW %sp, 16 :: (load 4 from %stack.1)
+    %ra = LW %sp, 20 :: (load 4 from %stack.0)
+    %sp = ADDiu %sp, 24
+    PseudoReturn undef %ra, implicit killed %v0
+
+...
+