Index: llvm/lib/Target/RISCV/CMakeLists.txt
===================================================================
--- llvm/lib/Target/RISCV/CMakeLists.txt
+++ llvm/lib/Target/RISCV/CMakeLists.txt
@@ -22,6 +22,7 @@
   RISCVAsmPrinter.cpp
   RISCVCallLowering.cpp
   RISCVExpandAtomicPseudoInsts.cpp
+  RISCVCompressInstrs.cpp
   RISCVExpandPseudoInsts.cpp
   RISCVFrameLowering.cpp
   RISCVInstrInfo.cpp
Index: llvm/lib/Target/RISCV/RISCV.h
===================================================================
--- llvm/lib/Target/RISCV/RISCV.h
+++ llvm/lib/Target/RISCV/RISCV.h
@@ -37,6 +37,9 @@
 
 FunctionPass *createRISCVISelDag(RISCVTargetMachine &TM);
 
+FunctionPass *createRISCVCompressInstrsOptPass();
+void initializeRISCVCompressInstrsOptPass(PassRegistry &);
+
 FunctionPass *createRISCVMergeBaseOffsetOptPass();
 void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &);
 
Index: llvm/lib/Target/RISCV/RISCVCompressInstrs.cpp
===================================================================
--- /dev/null
+++ llvm/lib/Target/RISCV/RISCVCompressInstrs.cpp
@@ -0,0 +1,333 @@
+//===-- RISCVCompressInstrs.cpp - Make instructions compressible ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass searches for instructions that are prevented from being compressed
+// by one of the following:
+//
+//   1. The use of a single uncompressed register.
+//   2. A base register + offset where the offset is too large to be compressed
+//   and the base register may or may not be compressed.
+//
+//
+// For case 1, if a compressed register is available, then the uncompressed
+// register is copied to the compressed register and its uses are replaced.
+//
+// For example, storing zero uses the uncompressible zero register:
+//   sw zero, 0(a0)   # if zero
+//   sw zero, 8(a0)   # if zero
+//   sw zero, 4(a0)   # if zero
+//   sw zero, 24(a0)   # if zero
+//
+// If a compressed register (e.g. a1) is available, the above can be transformed
+// to the following to improve code size:
+//   li a1, 0
+//   c.sw a1, 0(a0)
+//   c.sw a1, 8(a0)
+//   c.sw a1, 4(a0)
+//   c.sw a1, 24(a0)
+//
+//
+// For case 2, if a compressed register is available, then the original base
+// is copied and adjusted such that:
+//
+//   new_base_register = base_register + adjustment
+//   base_register + large_offset = new_base_register + small_offset
+//
+// For example, the following offsets are too large for c.sw:
+//   lui a2, 983065
+//   sw  a1, -236(a2)
+//   sw  a1, -240(a2)
+//   sw  a1, -244(a2)
+//   sw  a1, -248(a2)
+//   sw  a1, -252(a2)
+//   sw  a0, -256(a2)
+//
+// If a compressed register is available (e.g. a3), a new base could be created
+// such that the addresses can accessed with a compressible offset, thus
+// improving code size:
+//   lui a2, 983065
+//   addi  a3, a2, -256
+//   c.sw  a1, 20(a3)
+//   c.sw  a1, 16(a3)
+//   c.sw  a1, 12(a3)
+//   c.sw  a1, 8(a3)
+//   c.sw  a1, 4(a3)
+//   c.sw  a0, 0(a3)
+//
+//
+// This optimization is only applied if there are enough uses of the copied
+// register for code size to be reduced.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCV.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-compress-instrs"
+#define RISCV_COMPRESS_INSTRS_NAME "RISCV Compress Instructions"
+
+namespace {
+
+struct RISCVCompressInstrsOpt : public MachineFunctionPass {
+  static char ID;
+
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  RISCVCompressInstrsOpt() : MachineFunctionPass(ID) {}
+
+  StringRef getPassName() const override { return RISCV_COMPRESS_INSTRS_NAME; }
+};
+} // namespace
+
+char RISCVCompressInstrsOpt::ID = 0;
+INITIALIZE_PASS(RISCVCompressInstrsOpt, "riscv-compress-instrs",
+                RISCV_COMPRESS_INSTRS_NAME, false, false)
+
+static const uint8_t CSW_OFFSET_MASK = 0b1111100;
+
+// Given an offset for a load/store, return the adjustment required to the base
+// register such that the address can be accessed with a compressible offset.
+// Return 0 if the offset is already compressible.
+static int64_t getBaseAdjustForCompression(int64_t offset) {
+  if (isShiftedUInt<5, 2>(offset))
+    return 0;
+  // Return the excess bits that do not fit in a compressible offset.
+  return offset & ~(CSW_OFFSET_MASK);
+}
+
+// Find a single register and/or large offset which, if compressible, would
+// allow the given instruction to be compressed.
+//
+// Possible return values:
+//   {Reg, 0}          - Uncompressed Reg needs replacing with a compressed
+//                       register.
+//   {Reg, N}          - Reg needs replacing with a compressed register and
+//                       N needs adding to the new register. (Reg may be
+//                       compressed or uncompressed).
+//   {Register(0), 0}  - No suitable optimization found for this instruction.
+static RegImmPair getRegImmPairPreventingCompression(const MachineInstr &MI) {
+  int64_t NewBaseAdjust = 0;
+
+  switch (MI.getOpcode()) {
+  default:
+    break;
+  case RISCV::LW: {
+    const MachineOperand &MOImm = MI.getOperand(2);
+    if (!MOImm.isImm())
+      break;
+
+    int64_t Offset = MOImm.getImm();
+
+    NewBaseAdjust = getBaseAdjustForCompression(Offset);
+
+    Register Base = MI.getOperand(1).getReg();
+    // Load from stack pointer does not have a requirement for either of the
+    // registers to be compressible and the offset can be a 6 bit immediate
+    // scaled by 4.
+    if (RISCV::SPRegClass.contains(Base)) {
+      if (!isShiftedUInt<6, 2>(Offset) && NewBaseAdjust)
+        return RegImmPair(Base, NewBaseAdjust);
+      break;
+    }
+
+    Register Dest = MI.getOperand(0).getReg();
+    bool DestCompressed = RISCV::GPRCRegClass.contains(Dest);
+    bool BaseCompressed = RISCV::GPRCRegClass.contains(Base);
+
+    // For loads we can only change the base register since dest is defined
+    // rather than used.
+    if ((!BaseCompressed || NewBaseAdjust) && DestCompressed)
+      return RegImmPair(Base, NewBaseAdjust);
+
+    break;
+  }
+  case RISCV::SW: {
+    const MachineOperand &MOImm = MI.getOperand(2);
+    if (!MOImm.isImm())
+      break;
+
+    int64_t Offset = MOImm.getImm();
+
+    NewBaseAdjust = getBaseAdjustForCompression(Offset);
+
+    Register Base = MI.getOperand(1).getReg();
+    // Store to stack pointer does not have a requirement for either of the
+    // registers to be compressible and the offset can be a 6 bit immediate
+    // scaled by 4.
+    if (RISCV::SPRegClass.contains(Base)) {
+      if (!isShiftedUInt<6, 2>(Offset) && NewBaseAdjust)
+        return RegImmPair(Base, NewBaseAdjust);
+      break;
+    }
+
+    Register Src = MI.getOperand(0).getReg();
+    bool SrcCompressed = RISCV::GPRCRegClass.contains(Src);
+    bool BaseCompressed = RISCV::GPRCRegClass.contains(Base);
+
+    // Cannot resolve uncompressible offset if we are resolving src reg
+    if (!SrcCompressed && (BaseCompressed || Src == Base) && !NewBaseAdjust)
+      return RegImmPair(Src, NewBaseAdjust);
+    if ((!BaseCompressed || NewBaseAdjust) && SrcCompressed)
+      return RegImmPair(Base, NewBaseAdjust);
+
+    break;
+  }
+  }
+  return RegImmPair(Register(0), 0);
+}
+
+// Check all uses after FirstMI of the given register, keeping a vector of
+// instructions that would be compressible if the given register (and offset if
+// applicable) were compressible.
+//
+// If there are enough uses for this optimization to improve code size and a
+// compressed register is available, return that compressed register.
+static Register analyzeCompressibleUses(MachineBasicBlock &MBB,
+                                        MachineInstr &FirstMI,
+                                        RegImmPair RegImm,
+                                        SmallVector<MachineInstr *, 8> &MIs) {
+  RegScavenger RS;
+  RS.enterBasicBlock(MBB);
+
+  for (MachineBasicBlock::instr_iterator I = FirstMI.getIterator(),
+                                         E = MBB.instr_end();
+       I != E; ++I) {
+    MachineInstr &MI = *I;
+
+    // If RegImm.Reg is defined in this instruction then we cannot optimize
+    // past this instruction. If the register is already compressed then it may
+    // possible to optimize a large offset in the current instruction - this
+    // will be detected by the proceeding call to
+    // getRegImmPairPreventingCompression.
+    bool DefinesTargetReg = false;
+    for (const MachineOperand &MO : MI.operands()) {
+      if (MO.isReg() && MO.getReg() == RegImm.Reg && MO.isDef()) {
+        DefinesTargetReg = true;
+        break;
+      }
+    }
+
+    // Determine if this is an instruction which would benefit from using the
+    // new register.
+    RegImmPair CandidateRegImm = getRegImmPairPreventingCompression(MI);
+    if (CandidateRegImm.Reg == RegImm.Reg &&
+        CandidateRegImm.Imm == RegImm.Imm) {
+      // Advance tracking since the value in the new register must be live for
+      // this instruction too.
+      RS.forward(I);
+
+      MIs.push_back(&MI);
+    }
+
+    // This instruction defines RegImm.Reg. Do not optimize any further
+    // instructions.
+    if (DefinesTargetReg)
+      break;
+  }
+
+  // Adjusting the base costs one new uncompressed addi and therefore three uses
+  // are required for a code size reduction. If no base adjustment is required,
+  // then copying the register costs one new c.mv (or c.li Rd, 0 for "copying"
+  // the zero register) and therefore two uses are required for a code size
+  // reduction.
+  if (MIs.size() < 2 || (RegImm.Imm != 0 && MIs.size() < 3))
+    return Register(0);
+
+  // Find a compressible register which will be available from the first
+  // instruction we care about to the last.
+  return RS.scavengeRegisterBackwards(RISCV::GPRCRegClass,
+                                      FirstMI.getIterator(),
+                                      /*RestoreAfter=*/false, /*SPAdj=*/0,
+                                      /*AllowSpill=*/false);
+}
+
+// Update uses of the old register in the given instruction to the new register.
+// Return false if no further instructions should be updated.
+static bool updateOperands(MachineInstr &MI, RegImmPair OldRegImm,
+                           Register NewReg) {
+  bool UpdatesAllowed = true;
+
+  // Update registers
+  for (MachineOperand &MO : MI.operands())
+    if (MO.isReg() && MO.getReg() == OldRegImm.Reg) {
+      if (MO.isReg() && MO.getReg() == OldRegImm.Reg && MO.isDef()) {
+        assert(MI.getOpcode() == RISCV::LW);
+        // Don't allow any more updates after optimizing LW where OldRegImm.Reg
+        // is defined and don't update this register.
+        UpdatesAllowed = false;
+        continue;
+      }
+      // Update reg
+      MO.setReg(NewReg);
+    }
+
+  // Update offset
+  if (MI.getOpcode() == RISCV::LW || MI.getOpcode() == RISCV::SW) {
+    MachineOperand &MOImm = MI.getOperand(2);
+    int64_t NewOffset = MOImm.getImm() & (CSW_OFFSET_MASK);
+    MOImm.setImm(NewOffset);
+  }
+
+  return UpdatesAllowed;
+}
+
+bool RISCVCompressInstrsOpt::runOnMachineFunction(MachineFunction &Fn) {
+  // This is a size optimization.
+  if (skipFunction(Fn.getFunction()) || !Fn.getFunction().hasOptSize())
+    return false;
+
+  const RISCVSubtarget &STI = Fn.getSubtarget<RISCVSubtarget>();
+  const RISCVInstrInfo &TII = *STI.getInstrInfo();
+
+  // This optimization only makes sense if compressed instructions are emitted.
+  if (!STI.hasStdExtC())
+    return false;
+
+  for (MachineBasicBlock &MBB : Fn) {
+    LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
+    for (MachineInstr &MI : MBB) {
+      // Determine if this instruction would otherwise be compressed if not for
+      // an uncompressible register or offset.
+      RegImmPair RegImm = getRegImmPairPreventingCompression(MI);
+      if (!RegImm.Reg && RegImm.Imm == 0)
+        continue;
+
+      // Determine if there is a set of instructions for which replacing this
+      // register with a compressed register (and compressible offset if
+      // applicable) is possible and will allow compression.
+      SmallVector<MachineInstr *, 8> MIs;
+      Register NewReg = analyzeCompressibleUses(MBB, MI, RegImm, MIs);
+      if (!NewReg)
+        continue;
+
+      assert(isInt<12>(RegImm.Imm));
+      BuildMI(MBB, MI, MI.getDebugLoc(), TII.get(RISCV::ADDI), NewReg)
+          .addReg(RegImm.Reg)
+          .addImm(RegImm.Imm);
+
+      // Update the set of instructions to use the compressed register and
+      // compressible offset instead. These instructions should now be
+      // compressible.
+      for (MachineInstr *UpdateMI : MIs)
+        if (!updateOperands(*UpdateMI, RegImm, NewReg))
+          break;
+    }
+  }
+  return true;
+}
+
+/// Returns an instance of the Compress Instructions Optimization pass.
+FunctionPass *llvm::createRISCVCompressInstrsOptPass() {
+  return new RISCVCompressInstrsOpt();
+}
Index: llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
===================================================================
--- llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -171,7 +171,10 @@
 
 void RISCVPassConfig::addPreSched2() {}
 
-void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
+void RISCVPassConfig::addPreEmitPass() {
+  addPass(&BranchRelaxationPassID);
+  addPass(createRISCVCompressInstrsOptPass());
+}
 
 void RISCVPassConfig::addPreEmitPass2() {
   addPass(createRISCVExpandPseudoPass());
Index: llvm/test/CodeGen/RISCV/compress-instrs.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/RISCV/compress-instrs.ll
@@ -0,0 +1,143 @@
+; RUN: llc -mtriple=riscv32 -mattr=+c -filetype=obj < %s \
+; RUN:   | llvm-objdump -d --triple=riscv32 --mattr=+c -M no-aliases - \
+; RUN:   | FileCheck -check-prefix=RV32IC %s
+
+define void @store_common_value(i32* %a, i32* %b, i32* %c) optsize {
+; RV32IC-LABEL: <store_common_value>:
+; RV32IC:      c.li a3, 0
+; RV32IC-NEXT: c.sw a3, 0(a0)
+; RV32IC-NEXT: c.sw a3, 0(a1)
+; RV32IC-NEXT: c.sw a3, 0(a2)
+; RV32IC-NEXT: c.jr ra
+entry:
+  store i32 0, i32* %a
+  store i32 0, i32* %b
+  store i32 0, i32* %c
+  ret void
+}
+
+define void @store_common_ptr() optsize {
+; RV32IC-LABEL: <store_common_ptr>:
+; RV32IC:      c.li a0, 1
+; RV32IC-NEXT: c.li a1, 0
+; RV32IC-NEXT: c.sw a0, 0(a1)
+; RV32IC-NEXT: c.li a0, 3
+; RV32IC-NEXT: c.sw a0, 0(a1)
+; RV32IC-NEXT: c.li a0, 5
+; RV32IC-NEXT: c.sw a0, 0(a1)
+; RV32IC-NEXT: c.jr ra
+entry:
+  store volatile i32 1, i32* inttoptr (i32 0 to i32*)
+  store volatile i32 3, i32* inttoptr (i32 0 to i32*)
+  store volatile i32 5, i32* inttoptr (i32 0 to i32*)
+  ret void
+}
+
+define void @load_common_ptr() optsize {
+; RV32IC-LABEL: <load_common_ptr>:
+; RV32IC:      c.li a1, 0
+; RV32IC-NEXT: c.lw a0, 0(a1)
+; RV32IC-NEXT: c.lw a0, 0(a1)
+; RV32IC-NEXT: c.lw a0, 0(a1)
+; RV32IC-NEXT: c.jr ra
+entry:
+  %a = load volatile i32, i32* inttoptr (i32 0 to i32*)
+  %b = load volatile i32, i32* inttoptr (i32 0 to i32*)
+  %c = load volatile i32, i32* inttoptr (i32 0 to i32*)
+  ret void
+}
+
+define void @store_large_offset() optsize {
+; RV32IC-LABEL: <store_large_offset>:
+; RV32IC:      lui a0, 74566
+; RV32IC-NEXT: c.li a1, 1
+; RV32IC-NEXT: addi a2, a0, -640
+; RV32IC-NEXT: c.sw a1, 0(a2)
+; RV32IC-NEXT: c.li a1, 3
+; RV32IC-NEXT: c.sw a1, 4(a2)
+; RV32IC-NEXT: c.li a1, 5
+; RV32IC-NEXT: c.sw a1, 8(a2)
+; RV32IC-NEXT: c.li a1, 7
+; RV32IC-NEXT: c.sw a1, 12(a2)
+; RV32IC-NEXT: c.jr ra
+entry:
+  store volatile i32 1, i32* inttoptr (i32 305421696 to i32*)
+  store volatile i32 3, i32* inttoptr (i32 305421700 to i32*)
+  store volatile i32 5, i32* inttoptr (i32 305421704 to i32*)
+  store volatile i32 7, i32* inttoptr (i32 305421708 to i32*)
+  ret void
+}
+
+
+define void @load_large_offset() optsize {
+; RV32IC-LABEL: <load_large_offset>:
+; RV32IC:      lui a0, 74566
+; RV32IC-NEXT: addi a2, a0, -640
+; RV32IC-NEXT: c.lw a1, 0(a2)
+; RV32IC-NEXT: c.lw a1, 4(a2)
+; RV32IC-NEXT: c.lw a1, 8(a2)
+; RV32IC-NEXT: c.lw a0, 12(a2)
+; RV32IC-NEXT: c.jr ra
+entry:
+  %a = load volatile i32, i32* inttoptr (i32 305421696 to i32*)
+  %b = load volatile i32, i32* inttoptr (i32 305421700 to i32*)
+  %c = load volatile i32, i32* inttoptr (i32 305421704 to i32*)
+  %d = load volatile i32, i32* inttoptr (i32 305421708 to i32*)
+  ret void
+}
+
+; The following cases should not be optimized as it would increase code size.
+
+define void @store_common_value_no_opt(i32* %a, i32* %b, i32* %c) optsize {
+; RV32IC-LABEL: <store_common_value_no_opt>:
+; RV32IC-NEXT: sw zero, 0(a0)
+; RV32IC-NEXT: c.jr ra
+entry:
+  store i32 0, i32* %a
+  ret void
+}
+
+define void @store_common_ptr_no_opt() optsize {
+; RV32IC-LABEL: <store_common_ptr_no_opt>:
+; RV32IC:      c.li a0, 1
+; RV32IC-NEXT: sw a0, 0(zero)
+; RV32IC-NEXT: c.jr ra
+entry:
+  store volatile i32 1, i32* inttoptr (i32 0 to i32*)
+  ret void
+}
+
+define void @load_common_ptr_no_opt() optsize {
+; RV32IC-LABEL: <load_common_ptr_no_opt>:
+; RV32IC-NEXT: lw a0, 0(zero)
+; RV32IC-NEXT: c.jr ra
+entry:
+  %a = load volatile i32, i32* inttoptr (i32 0 to i32*)
+  ret void
+}
+
+define void @store_large_offset_no_opt() optsize {
+; RV32IC-LABEL: <store_large_offset_no_opt>:
+; RV32IC:      lui a0, 74566
+; RV32IC-NEXT: c.li a1, 1
+; RV32IC-NEXT: sw a1, -640(a0)
+; RV32IC-NEXT: c.li a1, 3
+; RV32IC-NEXT: sw a1, -636(a0)
+; RV32IC-NEXT: c.jr ra
+entry:
+  store volatile i32 1, i32* inttoptr (i32 305421696 to i32*)
+  store volatile i32 3, i32* inttoptr (i32 305421700 to i32*)
+  ret void
+}
+
+define void @load_large_offset_no_opt() optsize {
+; RV32IC-LABEL: <load_large_offset_no_opt>:
+; RV32IC:      lui a0, 74566
+; RV32IC-NEXT: lw a1, -640(a0)
+; RV32IC-NEXT: lw a0, -636(a0)
+; RV32IC-NEXT: c.jr ra
+entry:
+  %a = load volatile i32, i32* inttoptr (i32 305421696 to i32*)
+  %b = load volatile i32, i32* inttoptr (i32 305421700 to i32*)
+  ret void
+}