Index: lib/Target/Mips/CMakeLists.txt
===================================================================
--- lib/Target/Mips/CMakeLists.txt
+++ lib/Target/Mips/CMakeLists.txt
@@ -47,6 +47,7 @@
   MipsSubtarget.cpp
   MipsTargetMachine.cpp
   MipsTargetObjectFile.cpp
+  MicroMips32SizeReduction.cpp
   )
 
 add_subdirectory(InstPrinter)
Index: lib/Target/Mips/MicroMips32SizeReduction.cpp
===================================================================
--- /dev/null
+++ lib/Target/Mips/MicroMips32SizeReduction.cpp
@@ -0,0 +1,1075 @@
+//=== MicroMips32SizeReduction.cpp - MicroMips size reduction pass --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Mips.h"
+#include "MipsInstrInfo.h"
+#include "MipsSubtarget.h"
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <algorithm>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "MicroMips-reduce-size"
+
+STATISTIC(NumReduced, "Number of 32-bit instrs reduced to 16-bit ones");
+STATISTIC(NumTwoOne, "Two instructions reduced to one instruction");
+STATISTIC(NumLwmSwm, "Several lw/sw instr. reduced to one lwm/swm instr.");
+
+namespace {
+
+// Order of operands
+enum opNum { NA, opAll, op01, op02, op12, op2, opLwpSwp };
+
+// Reduction type:
+// SeveralInstr - several instructions into lwm/swm
+// TwoInstr - two instructions into one
+// OneInstr - 32-bit instruction into 16-bit instruction
+enum ReduceType { SeveralInstr, TwoInstr, OneInstr };
+
+// Information about immediate field restrictions
+struct ImmField {
+  ImmField() : ImmFieldOpperand(-1), Shift(0), LBound(0), HBound(0) {}
+  ImmField(uint8_t sh, int16_t lb, int16_t hb, int8_t immf)
+      : ImmFieldOpperand(immf), Shift(sh), LBound(lb), HBound(hb) {}
+
+  int8_t ImmFieldOpperand; // Immediate operand, -1 if it does not exist
+  uint8_t Shift;           // Shift value
+  int16_t LBound;          // Low bound of the immediate operand
+  int16_t HBound;          // High bound of the immediate operand
+};
+
+// Information about opperands
+struct OpInfo {
+  OpInfo(enum opNum to, bool snr = false)
+      : TransferOperands(to), SmallerNumRegs(snr) {}
+  OpInfo() : TransferOperands(NA), SmallerNumRegs(false) {}
+
+  enum opNum TransferOperands; // Operands to transfer to the new instruction
+  bool SmallerNumRegs; // In 16 bit instr a smaller num of registers is used
+};
+
+// Information about opcodes
+struct OpCodes {
+  OpCodes(uint16_t wop, uint16_t op2, uint16_t nop)
+      : WideOpc(wop), Opc2(op2), NarrowOpc(nop) {}
+  OpCodes(uint16_t wop, uint16_t nop) : WideOpc(wop), Opc2(0), NarrowOpc(nop) {}
+
+  uint16_t WideOpc;   // Wide opcode
+  uint16_t Opc2;      // Opcode of a second instruction
+  uint16_t NarrowOpc; // Narrow opcode
+};
+
+/// ReduceTable - A static table with information on mapping from wide
+/// opcodes to narrow
+struct ReduceEntry {
+
+  enum ReduceType eRType; // Several instr. to one, Two instr. to one, 32 to 16
+  bool (*ReduceFunction)(void *v); // Pointer to reduce function
+  struct OpCodes Ops;              // All relevant OpCodes
+  struct OpInfo OpInf;             // Characteristics of operands
+  struct ImmField Imm;             // Characteristics of immediate field
+
+  ReduceEntry(enum ReduceType rtype, struct OpCodes op, bool (*f)(void *v),
+              struct OpInfo opinfo, struct ImmField imm)
+      : eRType(rtype), ReduceFunction(f), Ops(op), OpInf(opinfo), Imm(imm) {}
+
+  uint16_t NarrowOpc() const { return Ops.NarrowOpc; }
+  uint16_t WideOpc() const { return Ops.WideOpc; }
+  int16_t LBound() const { return Imm.LBound; }
+  int16_t HBound() const { return Imm.HBound; }
+  uint8_t Shift() const { return Imm.Shift; }
+  int8_t ImmField() const { return Imm.ImmFieldOpperand; }
+  enum opNum TransferOperands() const { return OpInf.TransferOperands; }
+  bool SmallerNumRegs() const { return OpInf.SmallerNumRegs; }
+  enum ReduceType RType() const { return eRType; }
+  uint16_t Opc2() const { return Ops.Opc2; }
+
+  // operator used by std::equal_range
+  bool operator<(const unsigned int r) const { return (WideOpc() < r); }
+
+  // operator used by std::equal_range
+  friend bool operator<(const unsigned int r, const struct ReduceEntry &re) {
+    return (r < re.WideOpc());
+  }
+
+  // operator used by std::sort
+  bool operator<(const struct ReduceEntry &r) const {
+    if (WideOpc() == r.WideOpc())
+      return (RType() < r.RType());
+    return (WideOpc() < r.WideOpc());
+  }
+};
+
+// Function arguments for ReduceFunction
+struct ReduceEntryFA {
+  MachineBasicBlock &MBB;                       // Basic block
+  const MachineBasicBlock::instr_iterator &MII; // Starting iterator
+  const MachineBasicBlock::instr_iterator &E;   // End iterator
+  MachineBasicBlock::instr_iterator
+      &NNextMII;            // Iterator to next instruction, if
+  const ReduceEntry &Entry; // Entry field
+
+  ReduceEntryFA(MachineBasicBlock &argMBB,
+                const MachineBasicBlock::instr_iterator &argMII,
+                const MachineBasicBlock::instr_iterator &argE,
+                MachineBasicBlock::instr_iterator &argNNextMII,
+                const ReduceEntry &argEntry)
+      : MBB(argMBB), MII(argMII), E(argE), NNextMII(argNNextMII),
+        Entry(argEntry) {}
+};
+
+struct lwmswm {
+  lwmswm() : found(false), MI(nullptr) {}
+  bool found;
+  MachineInstr *MI;
+};
+
+class MicroMips32SizeReduce : public MachineFunctionPass {
+public:
+  static char ID;
+  MicroMips32SizeReduce();
+
+  static const MipsInstrInfo *MipsII;
+  const MipsSubtarget *Subtarget;
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  const char *getPassName() const override {
+    return "MicroMips32 instruction size reduction pass";
+  }
+
+private:
+  /// ReduceMBB - Reduces width of instructions in the specified basic block.
+  bool ReduceMBB(MachineBasicBlock &MBB);
+
+  /// ReduceMI - Attempts to reduce MI, returns true on success.
+  bool ReduceMI(MachineBasicBlock &MBB,
+                const MachineBasicBlock::instr_iterator &MII,
+                const MachineBasicBlock::instr_iterator &E,
+                MachineBasicBlock::instr_iterator &NextMII);
+
+  // Attempts to reduce several instruction into LWM/SWM instruction,
+  // returns true on success
+  static bool ReduceMIToLWMSWM(void *v);
+
+  // Attempts to reduce into LWP/SWP instruction, returns true on success
+  static bool ReduceMIToLwpSwp(void *v);
+
+  // Attempts to reduce SW instruction, returns true on success
+  static bool ReduceSWtoSWSP(void *v);
+  static bool ReduceSWtoSW16(void *v);
+
+  // Attempts to reduce LW instruction, returns true on success
+  static bool ReduceLWtoLWSP(void *v);
+
+  // Attempts to reduce all other Load/Store instructions,
+  // returns true on success
+  static bool ReduceLoadStore(void *v);
+
+  // Adds an instruction into machine block, instead of MI
+  // deletes MI
+  static bool ReplaceInstruction(MachineBasicBlock &MBB, MachineInstr *MI,
+                                 const ReduceEntry &Entry);
+
+  // Adds an instruction into machine block, instead of fMI and sMI,
+  // after sMI, deletes fMI and sMI
+  static bool ReplaceInstruction(MachineBasicBlock &MBB, MachineInstr *fMI,
+                                 MachineInstr *sMI, bool flag,
+                                 const ReduceEntry &Entry);
+
+  // Adds LWM/SWM instruction into machine block, instead of MI and num
+  // following instructions
+  static bool AddInstructionLWMSWM(MachineBasicBlock &MBB, MachineInstr *MI,
+                                   int64_t offset,
+                                   const SmallVector<struct lwmswm, 10> &instrs,
+                                   bool lwm);
+
+  // Deletes instructions that are reduced to LWM/SWM
+  static void DeleteInstructions(MachineBasicBlock &MBB,
+                                 const SmallVector<struct lwmswm, 10> &instrs);
+
+  // Table with transformation rules for each instruction
+  static std::vector<ReduceEntry> ReduceTable;
+};
+
+char MicroMips32SizeReduce::ID = 0;
+const MipsInstrInfo *MicroMips32SizeReduce::MipsII;
+
+std::vector<ReduceEntry> MicroMips32SizeReduce::ReduceTable = {
+
+    // ReduceType, OpCodes, ReduceFunction,
+    // OpInfo(TransferOperands, SmallerNumRegs=false),
+    // ImmField(Shift, LBound, HBound, ImmFieldPosition)
+
+    {OneInstr, OpCodes(Mips::LWM32_MM, Mips::LWM16_MM), ReduceLoadStore,
+     OpInfo(opAll), ImmField(2, 0, 16, 2)},
+    {OneInstr, OpCodes(Mips::LWM_MM, Mips::LWM16_MM), ReduceLoadStore,
+     OpInfo(opAll), ImmField(2, 0, 16, 2)},
+    {OneInstr, OpCodes(Mips::LW_MM, Mips::LWSP_MM), ReduceLWtoLWSP,
+     OpInfo(opAll), ImmField(2, 0, 32, 2)},
+    {OneInstr, OpCodes(Mips::LW, Mips::LWSP_MM), ReduceLWtoLWSP, OpInfo(opAll),
+     ImmField(2, 0, 32, 2)},
+    {OneInstr, OpCodes(Mips::SW_MM, Mips::SW16_MM), ReduceSWtoSW16,
+     OpInfo(opAll), ImmField(2, 0, 16, 2)},
+    {OneInstr, OpCodes(Mips::SW, Mips::SW16_MM), ReduceSWtoSW16, OpInfo(opAll),
+     ImmField(2, 0, 16, 2)},
+    {OneInstr, OpCodes(Mips::SW_MM, Mips::SWSP_MM), ReduceSWtoSWSP,
+     OpInfo(opAll), ImmField(2, 0, 32, 2)},
+    {OneInstr, OpCodes(Mips::SW, Mips::SWSP_MM), ReduceSWtoSWSP, OpInfo(opAll),
+     ImmField(2, 0, 32, 2)},
+    {OneInstr, OpCodes(Mips::SWM32_MM, Mips::SWM16_MM), ReduceLoadStore,
+     OpInfo(opAll), ImmField(2, 0, 16, 2)},
+    {OneInstr, OpCodes(Mips::SWM_MM, Mips::SWM16_MM), ReduceLoadStore,
+     OpInfo(opAll), ImmField(2, 0, 16, 2)},
+
+    // Transfer two instructions into one
+    {TwoInstr, OpCodes(Mips::LW, Mips::LW, Mips::LWP_MM), ReduceMIToLwpSwp,
+     OpInfo(opLwpSwp), ImmField(0, -2048, 2048, 2)},
+    {TwoInstr, OpCodes(Mips::LW, Mips::LW_MM, Mips::LWP_MM), ReduceMIToLwpSwp,
+     OpInfo(opLwpSwp), ImmField(0, -2048, 2048, 2)},
+    {TwoInstr, OpCodes(Mips::LW_MM, Mips::LW, Mips::LWP_MM), ReduceMIToLwpSwp,
+     OpInfo(opLwpSwp), ImmField(0, -2048, 2048, 2)},
+    {TwoInstr, OpCodes(Mips::LW_MM, Mips::LW_MM, Mips::LWP_MM),
+     ReduceMIToLwpSwp, OpInfo(opLwpSwp), ImmField(0, -2048, 2048, 2)},
+
+    {TwoInstr, OpCodes(Mips::SW, Mips::SW, Mips::SWP_MM), ReduceMIToLwpSwp,
+     OpInfo(opLwpSwp), ImmField(0, -2048, 2048, 2)},
+    {TwoInstr, OpCodes(Mips::SW, Mips::SW_MM, Mips::SWP_MM), ReduceMIToLwpSwp,
+     OpInfo(opLwpSwp), ImmField(0, -2048, 2048, 2)},
+    {TwoInstr, OpCodes(Mips::SW_MM, Mips::SW_MM, Mips::SWP_MM),
+     ReduceMIToLwpSwp, OpInfo(opLwpSwp), ImmField(0, -2048, 2048, 2)},
+    {TwoInstr, OpCodes(Mips::SW_MM, Mips::SW, Mips::SWP_MM), ReduceMIToLwpSwp,
+     OpInfo(opLwpSwp), ImmField(0, -2048, 2048, 2)},
+
+    // Transfer several instructions into one
+    {SeveralInstr, OpCodes(Mips::LW, Mips::LWM_MM), ReduceMIToLWMSWM,
+     OpInfo(NA), ImmField(0, -2048, 2048, 2)},
+    {SeveralInstr, OpCodes(Mips::LW_MM, Mips::LWM_MM), ReduceMIToLWMSWM,
+     OpInfo(NA), ImmField(0, -2048, 2048, 2)},
+    {SeveralInstr, OpCodes(Mips::SW, Mips::SWM_MM), ReduceMIToLWMSWM,
+     OpInfo(NA), ImmField(0, -2048, 2048, 2)},
+    {SeveralInstr, OpCodes(Mips::SW_MM, Mips::SWM_MM), ReduceMIToLWMSWM,
+     OpInfo(NA), ImmField(0, -2048, 2048, 2)},
+};
+}
+
+// Returns true if the register Reg is $16, $17, or $2-$7.
+static bool isMMThreeBitGPRegister(unsigned Reg) {
+  using namespace Mips;
+  switch (Reg) {
+  case S0:
+  case S1:
+  case V0:
+  case V1:
+  case A0:
+  case A1:
+  case A2:
+  case A3:
+    return true;
+  default:
+    return false;
+  }
+}
+
+// Returns true if the register Reg is $0, $17, or $2-$7.
+static bool isMMSourceRegister(unsigned Reg) {
+  using namespace Mips;
+  switch (Reg) {
+  case ZERO:
+  case S1:
+  case V0:
+  case V1:
+  case A0:
+  case A1:
+  case A2:
+  case A3:
+    return true;
+  default:
+    return false;
+  }
+}
+// Returns true if the machine operand MO is register SP
+static bool IsSP(const MachineOperand &MO) {
+  if (MO.isReg() && (MO.getReg() == Mips::SP))
+    return true;
+  return false;
+}
+
+// Returns true if the machine operand MO is register $16, $17, or $2-$7.
+static bool isMMThreeBitGPRegister(const MachineOperand &MO) {
+  if (MO.isReg() && isMMThreeBitGPRegister(MO.getReg()))
+    return true;
+  return false;
+}
+
+// Returns true if the machine operand MO is register $0, $17, or $2-$7.
+static bool isMMSourceRegister(const MachineOperand &MO) {
+  if (MO.isReg() && isMMSourceRegister(MO.getReg()))
+    return true;
+  return false;
+}
+
+// Returns true if the operand op is an immediate value
+// and writes the immediate value into variable imm
+static bool GetImm(MachineInstr *MI, unsigned op, int64_t &imm) {
+
+  if (op >= MI->getNumOperands())
+    return false;
+  if (!MI->getOperand(op).isImm())
+    return false;
+  imm = MI->getOperand(op).getImm();
+  return true;
+}
+
+// Returns true if the operand op is a register
+// and writes the register into variable reg
+static bool GetReg(MachineInstr *MI, unsigned op, unsigned &reg) {
+  if (op >= MI->getNumOperands())
+    return false;
+  if (!MI->getOperand(op).isReg())
+    return false;
+  reg = MI->getOperand(op).getReg();
+  return true;
+}
+
+// Returns true if in machine instruction operands Op1 and Op2 are equal
+// registers
+static bool EqualRegsInInstr(MachineInstr *MI, uint8_t Op1, uint8_t Op2) {
+  unsigned reg1, reg2;
+  if (!GetReg(MI, Op1, reg1))
+    return false;
+  if (!GetReg(MI, Op2, reg2))
+    return false;
+  if (reg1 != reg2)
+    return false;
+  return true;
+}
+
+// Returns true if the registers Reg1 and Reg2 are consecutive
+static bool ConsecutiveRegisters(unsigned Reg1, unsigned Reg2) {
+  static SmallVector<unsigned, 31> registers = {
+      Mips::AT, Mips::V0, Mips::V1, Mips::A0, Mips::A1, Mips::A2, Mips::A3,
+      Mips::T0, Mips::T1, Mips::T2, Mips::T3, Mips::T4, Mips::T5, Mips::T6,
+      Mips::T7, Mips::S0, Mips::S1, Mips::S2, Mips::S3, Mips::S4, Mips::S5,
+      Mips::S6, Mips::S7, Mips::T8, Mips::T9, Mips::K0, Mips::K1, Mips::GP,
+      Mips::SP, Mips::FP, Mips::RA};
+
+  for (uint8_t i = 0; i < registers.size() - 1; i++) {
+    if (registers[i] == Reg1) {
+      if (registers[i + 1] == Reg2)
+        return true;
+      else
+        return false;
+    }
+  }
+  return false;
+}
+
+// Returns true if the variable value has the number of least-significant zero
+// bits equal to shift
+// and if the shifted value is between the bounds
+static bool InRange(int64_t value, unsigned short shift, int lbound,
+                    int hbound) {
+  int64_t value2 = value >> shift;
+  if ((value2 << shift) == value && (value2 >= lbound) && (value2 < hbound))
+    return true;
+  return false;
+}
+
+// Returns true if the instruction is not appropriate to be between
+// two instructions that should be reduced to one
+static bool NotAppropriateInstruction(MachineInstr *MI) {
+
+  if (MI->isCall())
+    return true;
+
+  return false;
+}
+
+// Returns true for lw instruction
+static bool isLw(MachineInstr *MI) {
+  return (MI->getOpcode() == Mips::LW || MI->getOpcode() == Mips::LW_MM);
+}
+
+// Returns true for sw instruction
+static bool isSw(MachineInstr *MI) {
+  return (MI->getOpcode() == Mips::SW || MI->getOpcode() == Mips::SW_MM);
+}
+
+// Returns true if immediate opperand is in range
+static bool ImmInRange(MachineInstr *MI, const ReduceEntry &Entry) {
+
+  int64_t offset;
+
+  if (!GetImm(MI, Entry.ImmField(), offset))
+    return false;
+
+  if (!InRange(offset, Entry.Shift(), Entry.LBound(), Entry.HBound()))
+    return false;
+
+  return true;
+}
+
+// Returns true if MI can be reduced to lwp/swp instruciton
+static bool CheckLwpSwpInstr(MachineInstr *MI, bool lwp,
+                             const ReduceEntry &Entry) {
+
+  if (!((lwp && isLw(MI)) || (!lwp && isSw(MI))))
+    return false;
+
+  unsigned reg;
+  if (!GetReg(MI, 0, reg))
+    return false;
+  if (reg == Mips::RA)
+    return false;
+
+  if (!GetReg(MI, 1, reg))
+    return false;
+
+  if (!ImmInRange(MI, Entry))
+    return false;
+
+  if (lwp && (EqualRegsInInstr(MI, 0, 1)))
+    return false;
+
+  return true;
+}
+
+// Returns true if registers and offsets are consecutive
+static bool consecutiveInstr(MachineInstr *fMI, MachineInstr *sMI) {
+
+  int64_t offset, noffset;
+  if (!GetImm(fMI, 2, offset))
+    return false;
+  if (!GetImm(sMI, 2, noffset))
+    return false;
+
+  unsigned reg1, reg2;
+  if (!GetReg(fMI, 0, reg1))
+    return false;
+  if (!GetReg(sMI, 0, reg2))
+    return false;
+
+  return ((offset == (noffset - 4)) && (ConsecutiveRegisters(reg1, reg2)));
+}
+
+// Returns true if the instruction MI uses at least one
+// of the registers reg1 and reg2
+static bool InstrUsesRegs(MachineInstr *MI, unsigned reg1, unsigned reg2) {
+  uint8_t numOp = MI->getNumOperands();
+  unsigned reg;
+  // Iterates through the registers that this instruction uses
+  for (uint8_t i = 0; i < numOp; ++i) {
+    if (GetReg(MI, i, reg)) {
+      if ((reg == reg1) || (reg == reg2))
+        return true;
+    }
+  }
+  return false;
+}
+
+// Adds all the registers that MI uses into the set RegistersUsed
+static void AddRegisters(MachineInstr *MI,
+                         SmallSet<unsigned, 32> &RegistersUsed) {
+  uint8_t numOp = MI->getNumOperands();
+  unsigned reg;
+  for (uint8_t i = 0; i < numOp; ++i) {
+    if (GetReg(MI, i, reg))
+      RegistersUsed.insert(reg);
+  }
+}
+
+// Returns true if the instruction MI can be part of lwm/swm instruction
+// and fills offset, base and register
+static bool getLwmSwmOperands(MachineInstr *MI, int64_t &offset, unsigned &reg0,
+                              unsigned &base, bool lwm,
+                              const ReduceEntry &Entry) {
+
+  if (!(lwm && isLw(MI)) && !(!lwm && isSw(MI)))
+    return false;
+
+  if (!GetImm(MI, 2, offset))
+    return false;
+
+  if (!ImmInRange(MI, Entry))
+    return false;
+
+  if (!GetReg(MI, 0, reg0))
+    return false;
+
+  if (!GetReg(MI, 1, base))
+    return false;
+
+  return true;
+}
+
+// Checks 16-bit lwm/swm instruction can be generated
+static bool LwmSwm16Bit(MachineInstr *MI,
+                        const std::vector<MachineOperand> &operands) {
+
+  unsigned endReg = Mips::RA;
+
+  if (!IsSP(MI->getOperand(1)))
+    return false;
+
+  int64_t offset;
+  if (!GetImm(MI, 2, offset))
+    return false;
+
+  if (!InRange(offset, 2, 0, 16))
+    return false;
+
+  unsigned num = operands.size();
+  if (num < 2 || num > 5)
+    return false;
+
+  if (operands[num - 1].getReg() != endReg)
+    return false;
+
+  return true;
+}
+
+// Finds the postion of the register reg in the array, starting from the
+// start position, or returns false
+static bool findLWMSWMPositon(unsigned reg, unsigned &Position) {
+  SmallVector<unsigned, 10> regs = {Mips::S0, Mips::S1, Mips::S2, Mips::S3,
+                                    Mips::S4, Mips::S5, Mips::S6, Mips::S7,
+                                    Mips::FP, Mips::RA};
+  for (uint8_t k = 0; k < regs.size(); k++)
+    if (reg == regs[k]) {
+      Position = k;
+      return true;
+    }
+
+  return false;
+}
+
+MicroMips32SizeReduce::MicroMips32SizeReduce() : MachineFunctionPass(ID) {
+  std::sort(ReduceTable.begin(), ReduceTable.end());
+}
+
+bool MicroMips32SizeReduce::ReduceMI(
+    MachineBasicBlock &MBB, const MachineBasicBlock::instr_iterator &MII,
+    const MachineBasicBlock::instr_iterator &E,
+    MachineBasicBlock::instr_iterator &NNextMII) {
+
+  MachineInstr *MI = &*MII;
+  unsigned Opcode = MI->getOpcode();
+
+  // Search the table.
+  std::vector<ReduceEntry>::const_iterator Start = std::begin(ReduceTable);
+  std::vector<ReduceEntry>::const_iterator End = std::end(ReduceTable);
+
+  std::pair<std::vector<ReduceEntry>::const_iterator,
+            std::vector<ReduceEntry>::const_iterator> Range =
+      std::equal_range(Start, End, Opcode);
+
+  if (Range.first == Range.second)
+    return false;
+
+  for (std::vector<ReduceEntry>::const_iterator Entry = Range.first;
+       Entry != Range.second; ++Entry) {
+    struct ReduceEntryFA s(MBB, MII, E, NNextMII, *Entry);
+    if (((*Entry).ReduceFunction)(&s))
+      return true;
+  }
+  return false;
+}
+
+bool MicroMips32SizeReduce::ReduceMIToLwpSwp(void *v) {
+
+  ReduceEntryFA fa = *(ReduceEntryFA *)v;
+  MachineBasicBlock &MBB = fa.MBB;
+  const MachineBasicBlock::instr_iterator &MII = fa.MII;
+  const MachineBasicBlock::instr_iterator &E = fa.E;
+  MachineBasicBlock::instr_iterator &NNextMII = fa.NNextMII;
+  const ReduceEntry &Entry = fa.Entry;
+
+  MachineBasicBlock::instr_iterator NextMII; //, StartMII = MII;
+
+  // First instruction
+  MachineInstr *fMI = &*MII;
+  // Second instruction
+  MachineInstr *sMI = nullptr;
+
+  bool lwp = isLw(fMI);   // lwp==true -> transform to lwp instruction
+  if (!lwp && !isSw(fMI)) // lwp==false && isSw -> transform to swp instruction
+    return false;
+
+  if (!CheckLwpSwpInstr(fMI, lwp, Entry))
+    return false;
+
+  bool found = false;
+  bool consecutiveForward = false;
+  bool consecutiveBackward = false;
+
+  unsigned reg1, reg2;
+  if (!GetReg(fMI, 0, reg1))
+    return false;
+  if (!GetReg(fMI, 1, reg2)) // equal to GetReg(sMI,1,reg2)
+    return false;
+
+  SmallSet<unsigned, 32> RegistersUsed;
+
+  // Iterate through block to find second instruction
+  MachineBasicBlock::instr_iterator iMII = std::next(MII);
+  for (; iMII != E; iMII = NextMII) {
+
+    NextMII = std::next(iMII);
+    if (NextMII == E)
+      break;
+
+    sMI = &*iMII;
+
+    if (CheckLwpSwpInstr(sMI, lwp, Entry)) {
+      unsigned reg;
+      if (GetReg(sMI, 1, reg) && (reg2 == reg)) {
+        consecutiveForward = consecutiveInstr(fMI, sMI);
+        consecutiveBackward = consecutiveInstr(sMI, fMI);
+        found = consecutiveForward || consecutiveBackward;
+      }
+    }
+
+    if (found)
+      break;
+
+    // if the instruction is not appropriate, the
+    // reduction is not possible
+    if (lwp && isSw(sMI))
+      return false;
+    if (!lwp && isLw(sMI))
+      return false;
+    if (NotAppropriateInstruction(sMI))
+      return false;
+    if (InstrUsesRegs(sMI, reg1, reg2))
+      return false;
+
+    // memorize registers used by intermediate instructions
+    AddRegisters(sMI, RegistersUsed);
+  }
+
+  if (!found)
+    return false;
+
+  unsigned reg;
+  if (!GetReg(sMI, 0, reg))
+    return false;
+
+  // If some intermediate instruction uses reg,
+  // then reduction is not possible
+  if (RegistersUsed.count(reg))
+    return false;
+
+  NNextMII = std::next(iMII);
+  return ReplaceInstruction(MBB, fMI, sMI, consecutiveForward, Entry);
+}
+
+bool MicroMips32SizeReduce::ReduceLWtoLWSP(void *v) {
+  ReduceEntryFA fa = *(ReduceEntryFA *)v;
+  MachineBasicBlock &MBB = fa.MBB;
+  MachineInstr *MI = &*(fa.MII);
+  const ReduceEntry &Entry = fa.Entry;
+
+  if (!ImmInRange(MI, Entry))
+    return false;
+
+  if (!IsSP(MI->getOperand(1)))
+    return false;
+
+  return ReplaceInstruction(MBB, MI, Entry);
+}
+
+bool MicroMips32SizeReduce::ReduceSWtoSW16(void *v) {
+  ReduceEntryFA fa = *(ReduceEntryFA *)v;
+  MachineBasicBlock &MBB = fa.MBB;
+  MachineInstr *MI = &*(fa.MII);
+  const ReduceEntry &Entry = fa.Entry;
+
+  if (!ImmInRange(MI, Entry))
+    return false;
+
+  if (!(isMMSourceRegister(MI->getOperand(0)) &&
+        isMMThreeBitGPRegister(MI->getOperand(1))))
+    return false;
+
+  return ReplaceInstruction(MBB, MI, Entry);
+}
+
+bool MicroMips32SizeReduce::ReduceSWtoSWSP(void *v) {
+  ReduceEntryFA fa = *(ReduceEntryFA *)v;
+  MachineBasicBlock &MBB = fa.MBB;
+  MachineInstr *MI = &*(fa.MII);
+  const ReduceEntry &Entry = fa.Entry;
+
+  if (!ImmInRange(MI, Entry))
+    return false;
+
+  if (!IsSP(MI->getOperand(1)))
+    return false;
+
+  return ReplaceInstruction(MBB, MI, Entry);
+}
+
+bool MicroMips32SizeReduce::ReduceLoadStore(void *v) {
+
+  ReduceEntryFA fa = *(ReduceEntryFA *)v;
+  MachineBasicBlock &MBB = fa.MBB;
+  MachineInstr *MI = &*(fa.MII);
+  const ReduceEntry &Entry = fa.Entry;
+
+  if (!ImmInRange(MI, Entry))
+    return false;
+
+  // Check LWM/SWM instruction
+  if ((Entry.WideOpc() == Mips::SWM32_MM || Entry.WideOpc() == Mips::SWM_MM) ||
+      (Entry.WideOpc() == Mips::LWM32_MM || Entry.WideOpc() == Mips::LWM_MM)) {
+
+    if (!IsSP(MI->getOperand(1)))
+      return false;
+
+    int64_t reglist;
+    if (!GetImm(MI, 0, reglist))
+      return false;
+
+    if (!InRange(reglist, 0, 17, 22))
+      return false;
+  }
+
+  return ReplaceInstruction(MBB, MI, Entry);
+}
+
+void MicroMips32SizeReduce::DeleteInstructions(
+    MachineBasicBlock &MBB, const SmallVector<struct lwmswm, 10> &instrs) {
+
+  for (unsigned i = 0; i < instrs.size(); i++)
+    if (instrs[i].found)
+      MBB.erase(instrs[i].MI);
+}
+
+bool MicroMips32SizeReduce::ReduceMIToLWMSWM(void *v) {
+  ReduceEntryFA fa = *(ReduceEntryFA *)v;
+  MachineBasicBlock &MBB = fa.MBB;
+  const MachineBasicBlock::instr_iterator &MII = fa.MII;
+  const MachineBasicBlock::instr_iterator &E = fa.E;
+  MachineBasicBlock::instr_iterator &NextMII = fa.NNextMII;
+  const ReduceEntry &Entry = fa.Entry;
+
+  MachineBasicBlock::instr_iterator nMII;
+  MachineBasicBlock::instr_iterator iMII = MII;
+  MachineBasicBlock::instr_iterator startingMII = MII;
+  MachineInstr *MI = &*MII;
+  MachineInstr *startingMI = &*MII;
+
+  bool lwm = false;
+  if (isLw(MI))
+    lwm = true;
+  else if (!isSw(MI))
+    return false;
+
+  unsigned startingBase;
+  int64_t offset;
+  unsigned reg0, base;
+  if (!getLwmSwmOperands(MI, offset, reg0, startingBase, lwm, Entry))
+    return false;
+
+  unsigned endReg = Mips::RA;
+
+  SmallVector<struct lwmswm, 10> instrs(10);
+
+  unsigned position;
+  if (findLWMSWMPositon(reg0, position)) {
+    instrs[position].found = true;
+    instrs[position].MI = MI;
+  } else
+    return false;
+
+  int64_t startingOffset = 0;
+  bool b = false;
+  if (reg0 != endReg) {
+    startingOffset = offset - position * 4;
+    b = true;
+  }
+
+  if (lwm) {
+    if (startingBase == reg0)
+      return false;
+  }
+
+  for (iMII = std::next(startingMII); iMII != E; iMII = nMII) {
+    MachineInstr *MI = &*iMII;
+    nMII = std::next(iMII);
+
+    if (!getLwmSwmOperands(MI, offset, reg0, base, lwm, Entry))
+      break;
+
+    if (startingBase != base)
+      break;
+
+    if (!findLWMSWMPositon(reg0, position))
+      break;
+
+    if (lwm) {
+      if (base == reg0)
+        return false;
+    }
+
+    if (instrs[position].found)
+      break;
+
+    if (!b) {
+      if (reg0 != endReg) {
+        startingOffset = offset - position * 4;
+        b = true;
+      }
+    }
+
+    if ((reg0 != endReg) && (startingOffset != offset - position * 4))
+      break;
+
+    instrs[position].found = true;
+    instrs[position].MI = MI;
+  }
+
+  unsigned i;
+  if (!(instrs[0].found))
+    return false;
+
+  for (i = 1; i < instrs.size(); i++)
+    if (!(instrs[i].found))
+      break;
+
+  unsigned num = i;
+
+  for (; i < (instrs.size() - 1); i++)
+    if (instrs[i].found)
+      instrs[i].found = false;
+
+  if (instrs[9].found) {
+    getLwmSwmOperands(instrs[9].MI, offset, reg0, base, lwm, Entry);
+
+    if (num != instrs.size()) {
+      if (startingOffset != offset - num * 4)
+        instrs[9].found = false;
+      else
+        num++;
+    } else if (startingOffset != (offset - (num - 1) * 4))
+      instrs[9].found = false;
+  }
+
+  if (num == 1)
+    return false;
+
+  AddInstructionLWMSWM(MBB, startingMI, startingOffset, instrs, lwm);
+  NextMII = nMII;
+  DeleteInstructions(MBB, instrs);
+
+  return true;
+}
+
+bool MicroMips32SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+  MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),
+                                    E = MBB.instr_end();
+  MachineBasicBlock::instr_iterator NextMII;
+
+  bool ModifiedInstructions = false;
+
+  do {
+
+    ModifiedInstructions = false;
+    MII = MBB.instr_begin();
+    E = MBB.instr_end();
+
+    // Iterate through the instructions in the basic block
+    for (; MII != E; MII = NextMII) {
+      NextMII = std::next(MII);
+      MachineInstr *MI = &*MII;
+
+      if (MI->isBundle()) {
+        continue;
+      }
+      if (MI->isDebugValue())
+        continue;
+
+      // Try to reduce several instructions into one instruction
+      // Try to reduce two instructions into one instruction
+      // Try to reduce 32-bit instruction into 16-bit instruction
+      if (ReduceMI(MBB, MII, E, NextMII)) {
+        Modified = true;
+        ModifiedInstructions = true;
+      }
+    }
+  } while (ModifiedInstructions);
+
+  return Modified;
+}
+
+bool MicroMips32SizeReduce::AddInstructionLWMSWM(
+    MachineBasicBlock &MBB, MachineInstr *MI, int64_t offset,
+    const SmallVector<struct lwmswm, 10> &instrs, bool lwm) {
+
+  MachineInstr *iMI = MI;
+  DebugLoc dl = iMI->getDebugLoc();
+  MachineInstrBuilder MIB;
+
+  std::vector<MachineOperand> operands;
+
+  unsigned i;
+  for (i = 0; i < instrs.size(); i++) {
+    if (!(instrs[i].found))
+      break;
+    iMI = instrs[i].MI;
+    operands.push_back(iMI->getOperand(0));
+  }
+  if (i < instrs.size() && instrs[9].found)
+    operands.push_back(instrs[9].MI->getOperand(0));
+
+  // Check if 16-bit lwm/swm instruction can be generated
+  bool bit16 = LwmSwm16Bit(instrs[0].MI, operands);
+
+  if (bit16) {
+    if (lwm)
+      MIB = BuildMI(MBB, MI, dl, MipsII->get(Mips::LWM16_MM));
+    else
+      MIB = BuildMI(MBB, MI, dl, MipsII->get(Mips::SWM16_MM));
+  } else { // 32 bit lwm/swm
+    if (lwm)
+      MIB = BuildMI(MBB, MI, dl, MipsII->get(Mips::LWM32_MM));
+    else
+      MIB = BuildMI(MBB, MI, dl, MipsII->get(Mips::SWM32_MM));
+  }
+
+  for (unsigned int i = 0; i < operands.size(); i++)
+    MIB.addOperand(operands[i]);
+
+  MIB.addOperand(MI->getOperand(1));
+  MIB.addImm(offset);
+
+  DEBUG(errs() << "Converted to: " << *MIB);
+  ++NumLwmSwm;
+  return true;
+}
+
+bool MicroMips32SizeReduce::ReplaceInstruction(MachineBasicBlock &MBB,
+                                               MachineInstr *fMI,
+                                               MachineInstr *sMI, bool flag,
+                                               const ReduceEntry &Entry) {
+
+  const MCInstrDesc &NewMCID = MipsII->get(Entry.NarrowOpc());
+  DebugLoc dl = sMI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MBB, sMI, dl, NewMCID);
+
+  if (Entry.TransferOperands() == opLwpSwp) {
+    if (flag) {
+      MIB.addOperand(fMI->getOperand(0));
+      MIB.addOperand(sMI->getOperand(0));
+      MIB.addOperand(fMI->getOperand(1));
+      MIB.addOperand(fMI->getOperand(2));
+    } else { // consecutive backward
+      MIB.addOperand(sMI->getOperand(0));
+      MIB.addOperand(fMI->getOperand(0));
+      MIB.addOperand(sMI->getOperand(1));
+      MIB.addOperand(sMI->getOperand(2));
+    }
+  } else
+    return false;
+
+  DEBUG(errs() << "\nConverted " << *fMI << "and " << *sMI
+               << "       to 16-bit: " << *MIB);
+
+  MBB.erase_instr(fMI);
+  MBB.erase_instr(sMI);
+  ++NumTwoOne;
+
+  return true;
+}
+
+bool MicroMips32SizeReduce::ReplaceInstruction(MachineBasicBlock &MBB,
+                                               MachineInstr *MI,
+                                               const ReduceEntry &Entry) {
+
+  // Add the 16-bit instruction.
+  const MCInstrDesc &NewMCID = MipsII->get(Entry.NarrowOpc());
+  DebugLoc dl = MI->getDebugLoc();
+  MachineInstrBuilder MIB = BuildMI(MBB, MI, dl, NewMCID);
+
+  enum opNum opNums = Entry.TransferOperands();
+  if (opNums == op02) {
+    MIB.addOperand(MI->getOperand(0));
+    MIB.addOperand(MI->getOperand(2));
+  } else if (opNums == op01) {
+    MIB.addOperand(MI->getOperand(0));
+    MIB.addOperand(MI->getOperand(1));
+  } else if (opNums == op12) {
+    MIB.addOperand(MI->getOperand(1));
+    MIB.addOperand(MI->getOperand(2));
+  } else if (opNums == op2) {
+    MIB.addOperand(MI->getOperand(2));
+  } else if ((opNums == opAll) && Entry.SmallerNumRegs()) {
+    if (EqualRegsInInstr(MI, 0, 1)) {
+      MIB.addOperand(MI->getOperand(0));
+      MIB.addOperand(MI->getOperand(2));
+      MIB.addOperand(MI->getOperand(1));
+    } else {
+      MIB.addOperand(MI->getOperand(0));
+      MIB.addOperand(MI->getOperand(1));
+      MIB.addOperand(MI->getOperand(2));
+    }
+  } else if (opNums == opAll)
+    for (uint8_t i = 0, e = MI->getNumOperands(); i != e; ++i) {
+      const MachineOperand &MO = MI->getOperand(i);
+      MIB.addOperand(MO);
+    }
+  else
+    return false;
+
+  // Transfer memoperands.
+  MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+
+  // Transfer MI flags.
+  MIB.setMIFlags(MI->getFlags());
+
+  DEBUG(errs() << "Converted 32-bit: " << *MI << "       to 16-bit: " << *MIB);
+  MBB.erase_instr(MI);
+  ++NumReduced;
+  return true;
+}
+
+bool MicroMips32SizeReduce::runOnMachineFunction(MachineFunction &MF) {
+
+  Subtarget = &static_cast<const MipsSubtarget &>(MF.getSubtarget());
+
+  if (!Subtarget->inMicroMipsMode())
+    return false;
+
+  MipsII = static_cast<const MipsInstrInfo *>(Subtarget->getInstrInfo());
+
+  bool Modified = false;
+  MachineFunction::iterator I = MF.begin(), E = MF.end();
+
+  for (; I != E; ++I)
+    Modified |= ReduceMBB(*I);
+  return Modified;
+}
+
+/// createMicroMipsSizeReductionPass - Returns an instance of the MicroMips size
+/// reduction pass.
+FunctionPass *llvm::createMicroMips32SizeReductionPass() {
+  return new MicroMips32SizeReduce();
+}
Index: lib/Target/Mips/MicroMipsInstrInfo.td
===================================================================
--- lib/Target/Mips/MicroMipsInstrInfo.td
+++ lib/Target/Mips/MicroMipsInstrInfo.td
@@ -526,7 +526,7 @@
 
 class StoreMultMM<string opstr,
             InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
-  InstSE<(outs), (ins reglist:$rt, mem_mm_12:$addr),
+  InstSE<(outs), (ins reglist:$rt, variable_ops, mem_mm_12:$addr),
          !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI, opstr> {
   let DecoderMethod = "DecodeMemMMImm12";
   let mayStore = 1;
@@ -534,7 +534,7 @@
 
 class LoadMultMM<string opstr,
             InstrItinClass Itin = NoItinerary, ComplexPattern Addr = addr> :
-  InstSE<(outs reglist:$rt), (ins mem_mm_12:$addr),
+  InstSE<(outs reglist:$rt, variable_ops), (ins mem_mm_12:$addr),
           !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI, opstr> {
   let DecoderMethod = "DecodeMemMMImm12";
   let mayLoad = 1;
@@ -543,7 +543,7 @@
 class StoreMultMM16<string opstr,
                     InstrItinClass Itin = NoItinerary,
                     ComplexPattern Addr = addr> :
-  MicroMipsInst16<(outs), (ins reglist16:$rt, mem_mm_4sp:$addr),
+  MicroMipsInst16<(outs), (ins reglist16:$rt, variable_ops, mem_mm_4sp:$addr),
                   !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
   let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
   let mayStore = 1;
@@ -552,7 +552,7 @@
 class LoadMultMM16<string opstr,
                    InstrItinClass Itin = NoItinerary,
                    ComplexPattern Addr = addr> :
-  MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr),
+  MicroMipsInst16<(outs reglist16:$rt, variable_ops), (ins mem_mm_4sp:$addr),
                   !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI> {
   let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
   let mayLoad = 1;
Index: lib/Target/Mips/Mips.h
===================================================================
--- lib/Target/Mips/Mips.h
+++ lib/Target/Mips/Mips.h
@@ -32,6 +32,7 @@
   FunctionPass *createMipsHazardSchedule();
   FunctionPass *createMipsLongBranchPass(MipsTargetMachine &TM);
   FunctionPass *createMipsConstantIslandPass(MipsTargetMachine &tm);
+  FunctionPass *createMicroMips32SizeReductionPass();
 } // end namespace llvm;
 
 #endif
Index: lib/Target/Mips/MipsTargetMachine.cpp
===================================================================
--- lib/Target/Mips/MipsTargetMachine.cpp
+++ lib/Target/Mips/MipsTargetMachine.cpp
@@ -250,6 +250,7 @@
 // print out the code after the passes.
 void MipsPassConfig::addPreEmitPass() {
   MipsTargetMachine &TM = getMipsTargetMachine();
+  addPass(createMicroMips32SizeReductionPass());
 
   // The delay slot filler pass can potientially create forbidden slot (FS)
   // hazards for MIPSR6 which the hazard schedule pass (HSP) will fix. Any
Index: test/CodeGen/Mips/micromips-lwm-swm-lwp-swp-sw16.ll
===================================================================
--- /dev/null
+++ test/CodeGen/Mips/micromips-lwm-swm-lwp-swp-sw16.ll
@@ -0,0 +1,97 @@
+; RUN: llc -march=mipsel -mattr=+micromips < %s | FileCheck %s
+
+define void @ell_3m_mul_d(double* %m3, double* %m1, double* %m2) #0 {
+entry:
+; CHECK: swm16
+; CHECK: lwp
+; CHECK: lwm16
+  %arrayidx = getelementptr double, double* %m1, i32 5
+  %0 = load double, double* %arrayidx, align 8
+  %arrayidx1 = getelementptr double, double* %m2, i32 1
+  %1 = load double, double* %arrayidx1, align 8
+  %mul = fmul double %0, %1
+  %arrayidx2 = getelementptr double, double* %m1, i32 8
+  %2 = load double, double* %arrayidx2, align 8
+  %arrayidx3 = getelementptr double, double* %m2, i32 2
+  %3 = load double, double* %arrayidx3, align 8
+  %mul4 = fmul double %2, %3
+  %add = fadd double %mul, %mul4
+  %arrayidx5 = getelementptr double, double* %m3, i32 2
+  store double %add, double* %arrayidx5, align 8
+  ret void
+}
+
+define void @ell_4m_inv_d(double* %i, double* %m) #0 {
+entry:
+; CHECK: swm32
+; CHECK: lwp
+; CHECK: lwp
+; CHECK: lwp
+; CHECK: lwp
+; CHECK: lwm32
+  %0 = load double, double* %m, align 8
+  %arrayidx1 = getelementptr double, double* %m, i32 5
+  %1 = load double, double* %arrayidx1, align 8
+  %arrayidx2 = getelementptr double, double* %m, i32 10
+  %2 = load double, double* %arrayidx2, align 8
+  %mul = fmul double %1, %2
+  %arrayidx3 = getelementptr double, double* %m, i32 15
+  %3 = load double, double* %arrayidx3, align 8
+  %mul4 = fmul double %mul, %3
+  %mul5 = fmul double %0, %mul4
+  %arrayidx11 = getelementptr double, double* %m, i32 9
+  %4 = load double, double* %arrayidx11, align 8
+  %arrayidx12 = getelementptr double, double* %m, i32 14
+  %5 = load double, double* %arrayidx12, align 8
+  %mul13 = fmul double %4, %5
+  %add = fadd double %mul4, %mul13
+  %div = fdiv double %add, %mul5
+  store double %div, double* %i, align 8
+  ret void
+}
+
+define i32 @sw16(i32* nocapture %r) #0 {
+entry:
+; CHECK: sw16
+  store i32 0, i32* %r, align 4
+  ret i32 0
+}
+
+%union.expr_rec = type { %struct.constant_rec }
+%struct.constant_rec = type { i32, %union.Type_Rec*, i32, i32, i32, [4 x i8*], i32, i32, [4 x %union.scalar_constant_rec] }
+%union.Type_Rec = type { %struct.TypeStruct_Rec }
+%struct.TypeStruct_Rec = type { i32, i32, %union.Type_Rec*, %struct.Scope_Rec*, %struct.SourceLoc_Rec, i32, i32, i32, i32, i8*, i32, i8* }
+%struct.Scope_Rec = type { %struct.Scope_Rec*, %struct.Scope_Rec*, %struct.Scope_Rec*, %struct.Scope_Rec*, %struct.MemoryPool_rec*, %struct.Symbol_Rec*, %struct.Symbol_Rec*, %struct.Symbol_Rec*, %union.Type_Rec*, i32, i32, i32, i32, i32, i32, i32, i32, %struct.SymbolList_Rec*, %union.stmt_rec* }
+%struct.MemoryPool_rec = type opaque
+%struct.Symbol_Rec = type { %struct.Symbol_Rec*, %struct.Symbol_Rec*, %struct.Symbol_Rec*, i32, %union.Type_Rec*, %struct.SourceLoc_Rec, i32, i32, i32, i32, i8*, i8*, %union.anon }
+%union.anon = type { %struct.FunSymbol_Rec }
+%struct.FunSymbol_Rec = type { %struct.Scope_Rec*, %struct.Symbol_Rec*, %union.stmt_rec*, %struct.Symbol_Rec*, i32, i16, i16, i8 }
+%struct.SymbolList_Rec = type { %struct.SymbolList_Rec*, %struct.Symbol_Rec* }
+%union.stmt_rec = type { %struct.for_stmt_rec }
+%struct.for_stmt_rec = type { i32, %union.stmt_rec*, %struct.SourceLoc_Rec, %union.stmt_rec*, %union.expr_rec*, %union.stmt_rec*, %union.stmt_rec* }
+%struct.SourceLoc_Rec = type { i16, i16 }
+%union.scalar_constant_rec = type { float }
+%struct.binary_rec = type { i32, %union.Type_Rec*, i32, i32, i32, [4 x i8*], i32, i32, %union.expr_rec*, %union.expr_rec* }
+
+; Function Attrs: nounwind
+define %union.expr_rec* @GenVAssign(%union.expr_rec* %fVar, %union.expr_rec* %fExpr, i32 signext %base, i32 signext %len) {
+entry:
+; CHECK: swp
+; CHECK: lwp
+; CHECK: lwm16
+  %and = shl i32 %len, 4
+  %shl = and i32 %and, 240
+  %and1 = and i32 %base, 15
+  %or = or i32 %shl, %and1
+  %call = tail call %struct.binary_rec* @NewBinopSubNode(i32 signext 106, i32 signext %or, %union.expr_rec* %fVar, %union.expr_rec* %fExpr)
+  %0 = bitcast %struct.binary_rec* %call to %union.expr_rec*
+  %call2 = tail call %union.Type_Rec* @GetStandardType(i32 signext %base, i32 signext %len, i32 signext 0)
+  %type = getelementptr inbounds %struct.binary_rec, %struct.binary_rec* %call, i32 0, i32 1
+  store %union.Type_Rec* %call2, %union.Type_Rec** %type, align 4
+  ret %union.expr_rec* %0
+}
+
+declare %struct.binary_rec* @NewBinopSubNode(i32 signext, i32 signext, %union.expr_rec*, %union.expr_rec*)
+declare %union.Type_Rec* @GetStandardType(i32 signext, i32 signext, i32 signext)
+
+attributes #0 = { nounwind "use-soft-float"="true"  }
Index: test/CodeGen/Mips/micromips-lwsp-swsp.ll
===================================================================
--- /dev/null
+++ test/CodeGen/Mips/micromips-lwsp-swsp.ll
@@ -0,0 +1,61 @@
+; RUN: llc -march=mipsel -mattr=+micromips -filetype=asm -asm-show-inst < %s | FileCheck %s
+
+%struct.inflate_blocks_state = type { i32, %union.anon, i32, i32, i32, %struct.inflate_huft_s*, i8*, i8*, i8*, i8*, i32 (i32, i8*, i32)*, i32 }
+%union.anon = type { %struct.anon }
+%struct.anon = type { i32, i32, i32*, i32, %struct.inflate_huft_s* }
+%struct.inflate_huft_s = type { %union.anon.0, i32 }
+%union.anon.0 = type { i32 }
+%struct.z_stream_s = type { i8*, i32, i32, i8*, i32, i32, i8*, %struct.internal_state*, i8* (i8*, i32, i32)*, void (i8*, i8*)*, i8*, i32, i32, i32 }
+%struct.internal_state = type { i32 }
+
+@inflate_mask = global [17 x i32] [i32 0, i32 1, i32 3, i32 7, i32 15, i32 31, i32 63, i32 127, i32 255, i32 511, i32 1023, i32 2047, i32 4095, i32 8191, i32 16383, i32 32767, i32 65535], align 4
+
+; Function Attrs: nounwind
+define i32 @inflate_flush(%struct.inflate_blocks_state* nocapture %s, %struct.z_stream_s* nocapture %z, i32 signext %r) {
+entry:
+; CHECK: SWSP_MM
+; CHECK: SWSP_MM
+; CHECK: SWSP_MM
+; CHECK: LWSP_MM
+; CHECK: LWSP_MM
+; CHECK: LWSP_MM
+  %next_out = getelementptr inbounds %struct.z_stream_s, %struct.z_stream_s* %z, i32 0, i32 3
+  %0 = load i8*, i8** %next_out, align 4
+  %read = getelementptr inbounds %struct.inflate_blocks_state, %struct.inflate_blocks_state* %s, i32 0, i32 8
+  %1 = load i8*, i8** %read, align 4
+  %write = getelementptr inbounds %struct.inflate_blocks_state, %struct.inflate_blocks_state* %s, i32 0, i32 9
+  %2 = load i8*, i8** %write, align 4
+  %cmp = icmp ugt i8* %1, %2
+  br label %cond.end
+
+cond.end:                                         ; preds = %entry
+  %sub.ptr.lhs.cast = ptrtoint i8* %2 to i32
+  %sub.ptr.rhs.cast = ptrtoint i8* %1 to i32
+  %sub.ptr.sub = sub i32 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+  %avail_out = getelementptr inbounds %struct.z_stream_s, %struct.z_stream_s* %z, i32 0, i32 4
+  %3 = load i32, i32* %avail_out, align 4
+  %cmp2 = icmp ugt i32 %sub.ptr.sub, %3
+  %.sub.ptr.sub = select i1 %cmp2, i32 %3, i32 %sub.ptr.sub
+  %sub = sub i32 %3, %.sub.ptr.sub
+  store i32 %sub, i32* %avail_out, align 4
+  %total_out = getelementptr inbounds %struct.z_stream_s, %struct.z_stream_s* %z, i32 0, i32 5
+  %4 = load i32, i32* %total_out, align 4
+  %checkfn = getelementptr inbounds %struct.inflate_blocks_state, %struct.inflate_blocks_state* %s, i32 0, i32 10
+  %5 = load i32 (i32, i8*, i32)*, i32 (i32, i8*, i32)** %checkfn, align 4
+  %cmp8 = icmp eq i32 (i32, i8*, i32)* %5, null
+  br i1 %cmp8, label %if.end.12, label %if.then.9
+
+if.then.9:                                        ; preds = %cond.end
+  %check = getelementptr inbounds %struct.inflate_blocks_state, %struct.inflate_blocks_state* %s, i32 0, i32 11
+  %6 = load i32, i32* %check, align 4
+  %call = tail call i32 %5(i32 signext %6, i8* %1, i32 signext %.sub.ptr.sub)
+  store i32 %call, i32* %check, align 4
+  br label %if.end.12
+
+if.end.12:                                        ; preds = %cond.end, %if.then.9
+  tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %0, i8* %1, i32 %.sub.ptr.sub, i32 1, i1 false)
+  ret i32 0
+}
+
+; Function Attrs: nounwind argmemonly
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)