diff --git a/lld/ELF/Arch/RISCV.cpp b/lld/ELF/Arch/RISCV.cpp
--- a/lld/ELF/Arch/RISCV.cpp
+++ b/lld/ELF/Arch/RISCV.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "InputFiles.h"
+#include "OutputSections.h"
 #include "Symbols.h"
 #include "SyntheticSections.h"
 #include "Target.h"
@@ -36,6 +37,7 @@
                      const uint8_t *loc) const override;
   void relocate(uint8_t *loc, const Relocation &rel,
                 uint64_t val) const override;
+  void finalizeSections() const override;
 };
 
 } // end anonymous namespace
@@ -271,12 +273,7 @@
   case R_RISCV_TPREL_ADD:
     return R_NONE;
   case R_RISCV_ALIGN:
-    // Not just a hint; always padded to the worst-case number of NOPs, so may
-    // not currently be aligned, and without linker relaxation support we can't
-    // delete NOPs to realign.
-    errorOrWarn(getErrorLocation(loc) + "relocation R_RISCV_ALIGN requires "
-                "unimplemented linker relaxation; recompile with -mno-relax");
-    return R_NONE;
+    return R_RELAX_HINT;
   default:
     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
           ") against symbol " + toString(s));
@@ -476,6 +473,87 @@
   }
 }
 
+using DeleteRanges = std::vector<InputSectionBase::DeleteRange>;
+
+static void addDeleteRange(DeleteRanges &ranges, uint64_t offset,
+                           uint64_t size) {
+  ranges.push_back({offset, size});
+}
+
+static void relaxAlign() {
+  bool rvc = config->eflags & EF_RISCV_RVC;
+
+  for (OutputSection *os : outputSections) {
+    for (InputSection *is : getInputSections(*os)) {
+      if (!(is->flags & SHF_EXECINSTR))
+        continue;
+
+      uint64_t bytesDeleted = 0;
+      DeleteRanges deleteRanges;
+      for (auto &rel : is->relocations) {
+        if (rel.type != R_RISCV_ALIGN || rel.addend == 0)
+          continue;
+        uint64_t pc = is->getVA(rel.offset) - bytesDeleted;
+        uint64_t boundary = PowerOf2Ceil(rel.addend + 2);
+        uint64_t keepNopBytes = alignTo(pc, boundary) - pc;
+        if (keepNopBytes > (uint64_t)rel.addend) {
+          errorOrWarn(is->getObjMsg(rel.offset) + ": alignment requires " +
+                      Twine(keepNopBytes) + " of nop, but only " +
+                      Twine(rel.addend) + " bytes are available");
+          break;
+        }
+        uint64_t minNopWidth = rvc ? 2 : 4;
+        if (keepNopBytes % minNopWidth != 0) {
+          errorOrWarn(is->getObjMsg(rel.offset) + ": alignment nop bytes " +
+                      Twine(keepNopBytes) + " must be a multiple of " +
+                      Twine(minNopWidth));
+          break;
+        }
+
+        uint64_t bytesToDelete = rel.addend - keepNopBytes;
+        if (bytesToDelete > 0) {
+          // Delete excess NOPs. It doesn't matter whether we delete the head or
+          // the tail of the NOP range--symbols whose address range overlaps the
+          // NOPs range will have their address and size adjusted correctly
+          // either way.
+          addDeleteRange(deleteRanges, rel.offset, bytesToDelete);
+          bytesDeleted += bytesToDelete;
+        }
+
+        // The compiler already emitted a NOP sequence, but depending on how
+        // many bytes we must delete, the compiler's layout might be wrong.
+        // E.g., we might need to truncate a 4-byte NOP. Rather than decode the
+        // compiler-generated NOPs to determine the minimal fixup, just rewrite
+        // the entire pad range with the optimal sequence of NOPs.
+        uint8_t *buf = is->mutableData().data() + rel.offset;
+        while (keepNopBytes > 0) {
+          if (keepNopBytes >= 4) {
+            write32le(buf, 0x00000013); // nop
+            keepNopBytes -= 4;
+            buf += 4;
+          } else if (keepNopBytes == 2) {
+            assert(rvc && "expected RVC for 2-byte NOP");
+            write16le(buf, 0x0001); // c.nop
+            keepNopBytes -= 2;
+            buf += 2;
+          }
+        }
+      }
+
+      is->deleteRanges(deleteRanges);
+      script->assignAddresses();
+    }
+  }
+}
+
+void RISCV::finalizeSections() const {
+  // Can't perform relaxation if it is not a final link.
+  if (config->relocatable)
+    return;
+
+  relaxAlign();
+}
+
 TargetInfo *elf::getRISCVTargetInfo() {
   static RISCV target;
   return &target;
diff --git a/lld/ELF/InputSection.h b/lld/ELF/InputSection.h
--- a/lld/ELF/InputSection.h
+++ b/lld/ELF/InputSection.h
@@ -10,7 +10,9 @@
 #define LLD_ELF_INPUT_SECTION_H
 
 #include "Relocations.h"
+#include "lld/Common/CommonLinkerContext.h"
 #include "lld/Common/LLVM.h"
+#include "lld/Common/Memory.h"
 #include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/TinyPtrVector.h"
@@ -149,6 +151,7 @@
     bytesDropped -= num;
   }
 
+  mutable bool copiedData = false;
   mutable ArrayRef<uint8_t> rawData;
 
   void trim() {
@@ -164,6 +167,29 @@
     return rawData;
   }
 
+  MutableArrayRef<uint8_t> mutableData() const {
+    if (!copiedData) {
+      size_t size = data().size();
+      uint8_t *mutData = context().bAlloc.Allocate<uint8_t>(size);
+      memcpy(mutData, data().data(), size);
+      rawData = llvm::makeArrayRef(mutData, size);
+      copiedData = true;
+    }
+
+    return llvm::makeMutableArrayRef(const_cast<uint8_t *>(rawData.data()),
+                                     rawData.size());
+  }
+
+  // A pair of range to delete in (offset, size)
+  struct DeleteRange {
+    uint64_t offset;
+    uint64_t size;
+  };
+
+  // Delete ranges and adjust section content, symbols and relocations.
+  // The deleteRanges must be sorted by offset and must not overlap.
+  void deleteRanges(ArrayRef<DeleteRange> deleteRanges);
+
   // The next member in the section group if this section is in a group. This is
   // used by --gc-sections.
   InputSectionBase *nextInSectionGroup = nullptr;
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -150,6 +150,76 @@
   return ret;
 }
 
+// Wrap a defined symbol with two of these, allowing uniform access to its start
+// and end addresses. This allows us to maintain a unified vector of symbol
+// start and end addresses which we can query and and decrement individually.
+class SymbolAddr {
+public:
+  Defined *d;
+  bool isStart;
+
+  uint64_t address() const { return d->value + (isStart ? 0 : d->size); }
+  void decrement(uint64_t x) {
+    if (isStart) {
+      d->value -= x;
+      d->size += x; // retain original end address, we'll will decrement later
+    } else {
+      d->size -= x;
+    }
+  }
+};
+
+void InputSectionBase::deleteRanges(ArrayRef<DeleteRange> ranges) {
+  if (ranges.empty())
+    return;
+
+  // Make a unified vector of start & end addresses for all defined
+  // symbols within this section. Each symbol gets two entries in the vector,
+  // distinguished by the `isStart` member.
+  SmallVector<SymbolAddr> symbolAddrs;
+  for (auto &sym : file->getSymbols()) {
+    if (Defined *d = dyn_cast<Defined>(sym))
+      if (d->section == this) {
+        symbolAddrs.push_back({d, /*isStart*/ true});
+        symbolAddrs.push_back({d, /*isStart*/ false});
+      }
+  }
+  llvm::sort(symbolAddrs, [](const SymbolAddr &a, const SymbolAddr &b) {
+    if (a.address() == b.address())
+      return a.isStart;
+    return a.address() < b.address();
+  });
+  // Note that the ranges vector is already sorted by ascending address
+  uint64_t deletedBytes = 0;
+  size_t i = 0;
+  for (SymbolAddr &sa : symbolAddrs) {
+    for (; i < ranges.size() && ranges[i].offset < sa.address(); i++) {
+      // DeleteRanges should increase monotonically and never overlap
+      if (i > 0)
+        assert(ranges[i].offset >= ranges[i - 1].offset + ranges[i - 1].size);
+      deletedBytes += ranges[i].size;
+    }
+    // The previous DeleteRange should not span a symbol start/end address
+    if (i > 0)
+      assert(sa.address() >= ranges[i - 1].offset + ranges[i - 1].size);
+    sa.decrement(deletedBytes);
+  }
+
+  // Adjust section content piece-wise and resize the section.
+  MutableArrayRef<uint8_t> buf = this->mutableData();
+  auto *dst = buf.begin() + ranges.begin()->offset;
+  for (auto it = ranges.begin(), e = ranges.end(); it != e; ++it) {
+    auto *from = buf.begin() + it->offset + it->size;
+    auto *to = std::next(it) != ranges.end()
+                   ? (buf.begin() + std::next(it)->offset)
+                   : buf.end();
+    dst = std::copy(from, to, dst);
+  }
+
+  // Resize the section
+  rawData = makeArrayRef(data().data(), dst);
+}
+
 uint64_t SectionBase::getOffset(uint64_t offset) const {
   switch (kind()) {
   case Output: {
@@ -997,7 +1067,7 @@
   AArch64Relaxer aarch64relaxer(relocations);
   for (size_t i = 0, size = relocations.size(); i != size; ++i) {
     const Relocation &rel = relocations[i];
-    if (rel.expr == R_NONE)
+    if (rel.expr == R_NONE || rel.expr == R_RELAX_HINT)
       continue;
     uint64_t offset = rel.offset;
     uint8_t *bufLoc = buf + offset;
diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h
--- a/lld/ELF/Relocations.h
+++ b/lld/ELF/Relocations.h
@@ -46,6 +46,7 @@
   R_PLT,
   R_PLT_PC,
   R_PLT_GOTPLT,
+  R_RELAX_HINT,
   R_RELAX_GOT_PC,
   R_RELAX_GOT_PC_NOPIC,
   R_RELAX_TLS_GD_TO_IE,
diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp
--- a/lld/ELF/Relocations.cpp
+++ b/lld/ELF/Relocations.cpp
@@ -960,7 +960,8 @@
             R_MIPS_GOT_OFF, R_MIPS_GOT_OFF32, R_MIPS_GOT_GP_PC,
             R_AARCH64_GOT_PAGE_PC, R_GOT_PC, R_GOTONLY_PC, R_GOTPLTONLY_PC,
             R_PLT_PC, R_PLT_GOTPLT, R_PPC32_PLTREL, R_PPC64_CALL_PLT,
-            R_PPC64_RELAX_TOC, R_RISCV_ADD, R_AARCH64_GOT_PAGE>(e))
+            R_PPC64_RELAX_TOC, R_RISCV_ADD, R_RELAX_HINT, R_AARCH64_GOT_PAGE>(
+          e))
     return true;
 
   // These never do, except if the entire file is position dependent or if
diff --git a/lld/ELF/Target.h b/lld/ELF/Target.h
--- a/lld/ELF/Target.h
+++ b/lld/ELF/Target.h
@@ -92,6 +92,8 @@
   virtual void applyJumpInstrMod(uint8_t *loc, JumpModType type,
                                  JumpModType val) const {}
 
+  virtual void finalizeSections() const {}
+
   virtual ~TargetInfo();
 
   // This deletes a jump insn at the end of the section if it is a fall thru to
diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp
--- a/lld/ELF/Writer.cpp
+++ b/lld/ELF/Writer.cpp
@@ -1630,6 +1630,8 @@
   if (config->emachine == EM_HEXAGON)
     hexagonTLSSymbolUpdate(outputSections);
 
+  target->finalizeSections();
+
   int assignPasses = 0;
   for (;;) {
     bool changed = target->needsThunks && tc.createThunks(outputSections);
diff --git a/lld/test/ELF/riscv-relax-align-rvc.s b/lld/test/ELF/riscv-relax-align-rvc.s
new file mode 100644
--- /dev/null
+++ b/lld/test/ELF/riscv-relax-align-rvc.s
@@ -0,0 +1,43 @@
+# REQUIRES: riscv
+# RUN: rm -rf %t && mkdir -p %t
+
+# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+c,+relax %s -o %t/rv32.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+c,+relax %s -o %t/rv64.o
+
+# Check that alignment is always handled, regardless of --relax option
+
+# RUN: ld.lld %t/rv32.o -o %t/relax.rv32
+# RUN: ld.lld %t/rv64.o -o %t/relax.rv64
+# RUN: llvm-objdump -d -M no-aliases %t/relax.rv32 > %t/relax.rv32.dis
+# RUN: llvm-objdump -d -M no-aliases %t/relax.rv64 > %t/relax.rv64.dis
+# RUN: FileCheck %s < %t/relax.rv32.dis
+# RUN: FileCheck %s < %t/relax.rv64.dis
+
+# RUN: ld.lld --no-relax %t/rv32.o -o %t/no-relax.rv32
+# RUN: ld.lld --no-relax %t/rv64.o -o %t/no-relax.rv64
+# RUN: llvm-objdump -d -M no-aliases %t/no-relax.rv32 > %t/no-relax.rv32.dis
+# RUN: llvm-objdump -d -M no-aliases %t/no-relax.rv64 > %t/no-relax.rv64.dis
+# RUN: FileCheck %s < %t/no-relax.rv32.dis
+# RUN: FileCheck %s < %t/no-relax.rv64.dis
+
+# CHECK:      c.add   a0, a1
+# CHECK-NEXT: addi    zero, zero, 0
+# CHECK-NEXT: addi    zero, zero, 0
+# CHECK-NEXT: addi    zero, zero, 0
+# CHECK-NEXT: c.nop
+# CHECK-NEXT: c.add   s0, s1
+# CHECK-NEXT: c.add   s2, s3
+# CHECK-NEXT: c.add   s4, s5
+# CHECK-NEXT: c.nop
+# CHECK-NEXT: c.add   t0, t1
+
+.global _start
+_start:
+.balign 4
+  c.add a0, a1
+.balign 16
+  c.add s0, s1
+  c.add s2, s3
+  c.add s4, s5
+.balign 8
+  c.add t0, t1
diff --git a/lld/test/ELF/riscv-relax-align.s b/lld/test/ELF/riscv-relax-align.s
new file mode 100644
--- /dev/null
+++ b/lld/test/ELF/riscv-relax-align.s
@@ -0,0 +1,39 @@
+# REQUIRES: riscv
+# RUN: rm -rf %t && mkdir -p %t
+
+# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf -mattr=+relax %s -o %t/rv32.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf -mattr=+relax %s -o %t/rv64.o
+
+# Check that alignment is always handled regardless of --relax option
+
+# RUN: ld.lld %t/rv32.o -o %t/relax.rv32
+# RUN: ld.lld %t/rv64.o -o %t/relax.rv64
+# RUN: llvm-objdump -d -M no-aliases %t/relax.rv32 > %t/relax.rv32.dis
+# RUN: llvm-objdump -d -M no-aliases %t/relax.rv64 > %t/relax.rv64.dis
+# RUN: FileCheck %s < %t/relax.rv32.dis
+# RUN: FileCheck %s < %t/relax.rv64.dis
+
+# RUN: ld.lld --no-relax %t/rv32.o -o %t/no-relax.rv32
+# RUN: ld.lld --no-relax %t/rv64.o -o %t/no-relax.rv64
+# RUN: llvm-objdump -d -M no-aliases %t/no-relax.rv32 > %t/no-relax.rv32.dis
+# RUN: llvm-objdump -d -M no-aliases %t/no-relax.rv64 > %t/no-relax.rv64.dis
+# RUN: FileCheck %s < %t/no-relax.rv32.dis
+# RUN: FileCheck %s < %t/no-relax.rv64.dis
+
+# CHECK:      add     a0, a1, a2
+# CHECK-NEXT: add     a3, a4, a5
+# CHECK-NEXT: addi    zero, zero, 0
+# CHECK-NEXT: addi    zero, zero, 0
+# CHECK-NEXT: add     s0, s1, s2
+# CHECK-NEXT: add     t0, t1, t2
+
+.global _start
+_start:
+.balign 4
+  add a0, a1, a2
+  add a3, a4, a5
+.balign 16
+  add s0, s1, s2
+.balign 4
+.balign 4
+  add t0, t1, t2
diff --git a/lld/test/ELF/riscv-relax-syms.s b/lld/test/ELF/riscv-relax-syms.s
new file mode 100644
--- /dev/null
+++ b/lld/test/ELF/riscv-relax-syms.s
@@ -0,0 +1,44 @@
+# REQUIRES: riscv
+# RUN: rm -rf %t && mkdir -p %t
+
+// Check that relaxation correctly adjusts symbol addresses and sizes.
+
+# RUN: llvm-mc -filetype=obj -triple=riscv32-unknown-elf %s -o %t/rv32.o
+# RUN: llvm-mc -filetype=obj -triple=riscv64-unknown-elf %s -o %t/rv64.o
+# RUN: ld.lld -Ttext=0x100000 %t/rv32.o -o %t/rv32
+# RUN: ld.lld -Ttext=0x100000 %t/rv64.o -o %t/rv64
+
+# RUN: llvm-readelf -s %t/rv32 > %t/rv32.dis
+# RUN: llvm-readelf -s %t/rv64 > %t/rv64.dis
+# RUN: FileCheck %s < %t/rv32.dis
+# RUN: FileCheck %s < %t/rv64.dis
+
+# CHECK: 100000     4 NOTYPE  LOCAL  DEFAULT     1 a
+# CHECK: 100000    12 NOTYPE  LOCAL  DEFAULT     1 b
+# CHECK: 100004     8 NOTYPE  LOCAL  DEFAULT     1 c
+# CHECK: 100008    12 NOTYPE  LOCAL  DEFAULT     1 d
+# CHECK: 10000c     8 NOTYPE  LOCAL  DEFAULT     1 e
+# CHECK: 100010     4 NOTYPE  LOCAL  DEFAULT     1 f
+# CHECK: 100000    20 NOTYPE  GLOBAL DEFAULT     1 _start
+
+.global _start
+.balign 4
+_start:                         # 0
+a:
+b:
+    add  a0, a1, a2             # [0..4)
+.size a, .-a                    # 4
+c:
+    add s0, s1, s2              # [4..8)
+d:
+    add t0, t1, t2              # [8..12)
+.size b, .-b                    # 12
+.size c, .-c
+e:
+.balign 16
+f:                             # 16
+    add x0, x1, x2             # [16..20)
+.size d, .-d                   # 20
+.size e, .-e                   # 20
+.size f, .-f                   # 20
+.size _start, .-_start
diff --git a/lld/test/ELF/riscv-reloc-align.s b/lld/test/ELF/riscv-reloc-align.s
deleted file mode 100644
--- a/lld/test/ELF/riscv-reloc-align.s
+++ /dev/null
@@ -1,12 +0,0 @@
-# REQUIRES: riscv
-
-# RUN: llvm-mc -filetype=obj -triple=riscv32 -mattr=+relax %s -o %t.o
-# RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck %s
-
-# CHECK: relocation R_RISCV_ALIGN requires unimplemented linker relaxation
-
-.global _start
-_start:
-    nop
-    .balign 8
-    nop