Index: llvm/include/llvm/MC/MCAssembler.h
===================================================================
--- llvm/include/llvm/MC/MCAssembler.h
+++ llvm/include/llvm/MC/MCAssembler.h
@@ -193,7 +193,8 @@
   bool relaxInstruction(MCAsmLayout &Layout, MCRelaxableFragment &IF);
 
   bool relaxLEB(MCAsmLayout &Layout, MCLEBFragment &IF);
-
+  bool relaxBoundaryAlign(MCAsmLayout &Layout,
+                          MCBoundaryAlignFragment &MF);
   bool relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF);
   bool relaxDwarfCallFrameFragment(MCAsmLayout &Layout,
                                    MCDwarfCallFrameFragment &DF);
Index: llvm/include/llvm/MC/MCFragment.h
===================================================================
--- llvm/include/llvm/MC/MCFragment.h
+++ llvm/include/llvm/MC/MCFragment.h
@@ -41,6 +41,7 @@
     FT_Dwarf,
     FT_DwarfFrame,
     FT_LEB,
+    FT_BoundaryAlign,
     FT_SymbolId,
     FT_CVInlineLines,
     FT_CVDefRange,
@@ -563,6 +564,42 @@
   }
 };
 
+/// Representing required padding such that the region of instructions
+/// designated by the fragment one past this one to the designated fragment
+/// does does not cross a particular power-of-two boundary. 
+class MCBoundaryAlignFragment : public MCFragment {
+private:
+  /// The size of the MCBoundaryAlignFragment.  Lazily populated
+  /// during relaxation.
+  unsigned Size = 0;
+  /// The fragment which is the inclusive end of the region being aligned
+  /// w.r.t. the specified boundary.  (The begining is the fragment immediately
+  /// following this one.)
+  const MCFragment *Fragment = nullptr;
+  /// The boundary which must not be crossed.  Must be a power of two.
+  unsigned AlignBoundarySize = 0;
+
+public:
+  MCBoundaryAlignFragment(unsigned AlignBoundarySize,
+                             MCSection *Sec = nullptr)
+      : MCFragment(FT_BoundaryAlign, false, Sec),
+        AlignBoundarySize(AlignBoundarySize) {}
+
+  unsigned getBoundarySize() const { return AlignBoundarySize; }
+
+  void setSize(unsigned Value) { Size = Value; }
+  uint64_t getSize() const { return Size; }
+
+  void setFragment(const MCFragment *Target) {
+    assert(Target->getParent() == getParent() && "same section expected");
+    Fragment = Target;
+  }
+  const MCFragment *getFragment() const { return Fragment; }
+
+  static bool classof(const MCFragment *F) {
+    return F->getKind() == MCFragment::FT_BoundaryAlign;
+  }
+};
 } // end namespace llvm
 
 #endif // LLVM_MC_MCFRAGMENT_H
Index: llvm/include/llvm/MC/MCObjectStreamer.h
===================================================================
--- llvm/include/llvm/MC/MCObjectStreamer.h
+++ llvm/include/llvm/MC/MCObjectStreamer.h
@@ -46,6 +46,7 @@
         : Sym(McSym), Fixup(McFixup), DF(F) {}
   };
   SmallVector<PendingMCFixup, 2> PendingFixups;
+  MCBoundaryAlignFragment *PendingBA = nullptr;
 
   virtual void EmitInstToData(const MCInst &Inst, const MCSubtargetInfo&) = 0;
   void EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override;
@@ -120,6 +121,8 @@
   /// can change its size during relaxation.
   virtual void EmitInstToFragment(const MCInst &Inst, const MCSubtargetInfo &);
 
+  void EmitBoundaryAlign(unsigned AlignPow2) override;
+  void EmitBoundaryAlignEnd() override;
   void EmitBundleAlignMode(unsigned AlignPow2) override;
   void EmitBundleLock(bool AlignToEnd) override;
   void EmitBundleUnlock() override;
Index: llvm/include/llvm/MC/MCStreamer.h
===================================================================
--- llvm/include/llvm/MC/MCStreamer.h
+++ llvm/include/llvm/MC/MCStreamer.h
@@ -994,6 +994,12 @@
   /// Emit the given \p Instruction into the current section.
   virtual void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI);
 
+  /// Mark the region between .boundary_align N and .boundary_align_end  as
+  /// having an alignment requirement such that it can't cross (or end at) the
+  /// specified boundary. 
+  virtual void EmitBoundaryAlign(unsigned BoundaryPow2);
+  virtual void EmitBoundaryAlignEnd();
+
   /// Set the bundle alignment mode from now on in the section.
   /// The argument is the power of 2 to which the alignment is set. The
   /// value 0 means turn the bundle alignment off.
Index: llvm/lib/MC/MCAsmStreamer.cpp
===================================================================
--- llvm/lib/MC/MCAsmStreamer.cpp
+++ llvm/lib/MC/MCAsmStreamer.cpp
@@ -333,6 +333,8 @@
 
   void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
 
+  void EmitBoundaryAlign(unsigned BoundaryPow2) override;
+  void EmitBoundaryAlignEnd() override;
   void EmitBundleAlignMode(unsigned AlignPow2) override;
   void EmitBundleLock(bool AlignToEnd) override;
   void EmitBundleUnlock() override;
@@ -1955,6 +1957,16 @@
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitBoundaryAlign(unsigned AlignPow2) {
+  OS << "\t.boundary_align " << AlignPow2;
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitBoundaryAlignEnd() {
+  OS << "\t.boundary_align_end";
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitBundleAlignMode(unsigned AlignPow2) {
   OS << "\t.bundle_align_mode " << AlignPow2;
   EmitEOL();
Index: llvm/lib/MC/MCAssembler.cpp
===================================================================
--- llvm/lib/MC/MCAssembler.cpp
+++ llvm/lib/MC/MCAssembler.cpp
@@ -309,6 +309,9 @@
   case MCFragment::FT_LEB:
     return cast<MCLEBFragment>(F).getContents().size();
 
+  case MCFragment::FT_BoundaryAlign:
+    return cast<MCBoundaryAlignFragment>(F).getSize();
+
   case MCFragment::FT_SymbolId:
     return 4;
 
@@ -605,6 +608,15 @@
     break;
   }
 
+  case MCFragment::FT_BoundaryAlign: {
+    if (FragmentSize == 0)
+      break;
+    if (!Asm.getBackend().writeNopData(OS, FragmentSize))
+      report_fatal_error("unable to write nop sequence of " +
+                         Twine(FragmentSize) + " bytes");
+    break;
+  }
+
   case MCFragment::FT_SymbolId: {
     const MCSymbolIdFragment &SF = cast<MCSymbolIdFragment>(F);
     support::endian::write<uint32_t>(OS, SF.getSymbol()->getIndex(), Endian);
@@ -941,6 +953,87 @@
   return OldSize != LF.getContents().size();
 }
 
+/// mayCrossBoundary - Check if the branch with given address and size crosses
+/// the boundary.
+static bool mayCrossBoundary(unsigned StartAddr, unsigned Size,
+                             unsigned BoundarySize) {
+  unsigned EndAddr = StartAddr + Size;
+  return StartAddr / BoundarySize != ((EndAddr - 1) / BoundarySize);
+}
+
+/// isAgainstBoundary - Check if the branch with given address and size is
+/// against the boundary.
+static bool isAgainstBoundary(unsigned StartAddr, unsigned Size,
+                              unsigned BoundarySize) {
+  unsigned EndAddr = StartAddr + Size;
+  return EndAddr % BoundarySize == 0;
+}
+
+/// needPadding - Check if the branch with given address and size needs padding.
+static bool needPadding(unsigned StartAddr, unsigned Size,
+                        unsigned BoundarySize) {
+  return mayCrossBoundary(StartAddr, Size, BoundarySize) ||
+         isAgainstBoundary(StartAddr, Size, BoundarySize);
+}
+
+///  getPaddingSize - Get how many bytes need to be padded to align branch with
+///  given address if the branch cross or is against the boundary.
+static unsigned getPaddingSize(unsigned StartAddr, unsigned BoundarySize) {
+  return BoundarySize - (StartAddr % BoundarySize);
+}
+
+/// getInstSize - Get the size of encoded instruction in the fragment.
+// Can't this be replaced w/computeFragmentSize?
+static unsigned getInstSize(const MCFragment &F) {
+  switch (F.getKind()) {
+  default:
+    llvm_unreachable("Illegal fragment type");
+  case MCFragment::FT_Data:
+    return cast<MCDataFragment>(F).getContents().size();
+  case MCFragment::FT_Relaxable:
+    return cast<MCRelaxableFragment>(F).getContents().size();
+  case MCFragment::FT_CompactEncodedInst:
+    return cast<MCCompactEncodedInstFragment>(F).getContents().size();
+  }
+}
+
+/// Return the total size of all the fragments within the range specified by
+/// [Begin, End]
+static unsigned getSizeOfFragmentRange(const MCFragment *Begin, const MCFragment *End) {
+  unsigned Sum = 0;
+  while(true) {
+    Sum += getInstSize(*Begin);
+    if (Begin == End)
+      break;
+    Begin = Begin->getNextNode();
+  }
+  return Sum;
+}
+  
+
+bool MCAssembler::relaxBoundaryAlign(MCAsmLayout &Layout,
+                                     MCBoundaryAlignFragment &MF) {
+  auto *BranchFragment = MF.getFragment();
+  if (!BranchFragment)
+    return false;
+  auto *RegionBegin = MF.getNextNode();
+  unsigned BoundarySize = MF.getBoundarySize();
+  unsigned OldSize = MF.getSize();
+  unsigned AlignedSize = getSizeOfFragmentRange(RegionBegin, BranchFragment);
+  assert(AlignedSize <= BoundarySize);
+  unsigned AlignedOffset = Layout.getFragmentOffset(RegionBegin);
+  AlignedOffset -= OldSize;
+  dbgs() << AlignedSize << "@" << AlignedOffset << "\n";
+  unsigned NewSize = 0;
+  if (needPadding(AlignedOffset, AlignedSize, BoundarySize)) {
+    NewSize = getPaddingSize(AlignedOffset, BoundarySize);
+    dbgs() << NewSize << " " << BoundarySize << "\n";
+    assert(NewSize < BoundarySize);
+  }
+  MF.setSize(NewSize);
+  return (NewSize != OldSize);
+}
+
 bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout,
                                      MCDwarfLineAddrFragment &DF) {
   MCContext &Context = Layout.getAssembler().getContext();
@@ -1057,6 +1150,10 @@
     case MCFragment::FT_LEB:
       RelaxedFrag = relaxLEB(Layout, *cast<MCLEBFragment>(I));
       break;
+    case MCFragment::FT_BoundaryAlign:
+      RelaxedFrag =
+          relaxBoundaryAlign(Layout, *cast<MCBoundaryAlignFragment>(I));
+      break;
     case MCFragment::FT_CVInlineLines:
       RelaxedFrag =
           relaxCVInlineLineTable(Layout, *cast<MCCVInlineLineTableFragment>(I));
Index: llvm/lib/MC/MCFragment.cpp
===================================================================
--- llvm/lib/MC/MCFragment.cpp
+++ llvm/lib/MC/MCFragment.cpp
@@ -275,6 +275,9 @@
     case FT_LEB:
       delete cast<MCLEBFragment>(this);
       return;
+    case FT_BoundaryAlign:
+      delete cast<MCBoundaryAlignFragment>(this);
+      return;
     case FT_SymbolId:
       delete cast<MCSymbolIdFragment>(this);
       return;
@@ -319,6 +322,7 @@
   case MCFragment::FT_Dwarf: OS << "MCDwarfFragment"; break;
   case MCFragment::FT_DwarfFrame: OS << "MCDwarfCallFrameFragment"; break;
   case MCFragment::FT_LEB:   OS << "MCLEBFragment"; break;
+  case MCFragment::FT_BoundaryAlign: OS<<"MCBoundaryAlignFragment"; break;
   case MCFragment::FT_SymbolId:    OS << "MCSymbolIdFragment"; break;
   case MCFragment::FT_CVInlineLines: OS << "MCCVInlineLineTableFragment"; break;
   case MCFragment::FT_CVDefRange: OS << "MCCVDefRangeTableFragment"; break;
@@ -418,6 +422,15 @@
     OS << " Value:" << LF->getValue() << " Signed:" << LF->isSigned();
     break;
   }
+  case MCFragment::FT_BoundaryAlign: {
+    const MCBoundaryAlignFragment *MF =
+        cast<MCBoundaryAlignFragment>(this);
+    OS << "\n       ";
+    OS << " Size:" << MF->getSize();
+    OS << " AlignBoundarySize:" << MF->getBoundarySize();
+    OS << " Fragment:" << MF->getFragment();
+    break;
+  }
   case MCFragment::FT_SymbolId: {
     const MCSymbolIdFragment *F = cast<MCSymbolIdFragment>(this);
     OS << "\n       ";
Index: llvm/lib/MC/MCObjectStreamer.cpp
===================================================================
--- llvm/lib/MC/MCObjectStreamer.cpp
+++ llvm/lib/MC/MCObjectStreamer.cpp
@@ -371,6 +371,50 @@
   IF->getContents().append(Code.begin(), Code.end());
 }
 
+void MCObjectStreamer::EmitBoundaryAlign(unsigned BoundaryPow2) {
+  const unsigned Boundary = 1U << BoundaryPow2;
+  dbgs() << "EmitBoundaryAlign " << BoundaryPow2 << " " << Boundary << "\n";
+
+  if (PendingBA)
+    report_fatal_error("Nested .boundary_align regions not supported");
+
+  auto *F = new MCBoundaryAlignFragment(Boundary);
+  insert(F);
+  assert(!PendingBA);
+  PendingBA = F;
+}
+void MCObjectStreamer::EmitBoundaryAlignEnd() {
+  dbgs() << "EmitBoundaryAlignEnd \n ";
+  if (!PendingBA)
+    report_fatal_error("Missing .boundary_align before .boundary_align_end");
+
+  auto *F = PendingBA;
+  PendingBA = nullptr;
+
+  dbgs() << "Wiring \n";
+  F->dump();
+  dbgs() << "\n";
+  
+  MCFragment *CF = getCurrentFragment();
+  if (!CF)
+    return;
+  CF->dump();
+  dbgs() << "\n";
+  
+  // Link it for later relaxation - this allows the padding for alignment to
+  // actually be computed and emitted.
+  F->setFragment(CF);
+  
+  // Update the maximum alignment on the current section if necessary.
+  MCSection *Sec = getCurrentSectionOnly();
+  unsigned BoundarySize = F->getBoundarySize();
+  if (BoundarySize > Sec->getAlignment())
+    Sec->setAlignment(Align(BoundarySize));
+
+  // Break the last fragment so that more instructions can't be pushed into
+  // it. 
+  insert(new MCDataFragment());
+}
 #ifndef NDEBUG
 static const char *const BundlingNotImplementedMsg =
   "Aligned bundling is not implemented for this object format";
Index: llvm/lib/MC/MCParser/AsmParser.cpp
===================================================================
--- llvm/lib/MC/MCParser/AsmParser.cpp
+++ llvm/lib/MC/MCParser/AsmParser.cpp
@@ -412,6 +412,8 @@
     DK_ORG,
     DK_FILL,
     DK_ENDR,
+    DK_BOUNDARY_ALIGN,
+    DK_BOUNDARY_ALIGN_END,
     DK_BUNDLE_ALIGN_MODE,
     DK_BUNDLE_LOCK,
     DK_BUNDLE_UNLOCK,
@@ -602,6 +604,9 @@
   bool parseDirectiveMacrosOnOff(StringRef Directive);
   // alternate macro mode directives
   bool parseDirectiveAltmacro(StringRef Directive);
+  // ".boundary_align", ".boundary_align_end"
+  bool parseDirectiveBoundaryAlign();
+  bool parseDirectiveBoundaryAlignEnd();
   // ".bundle_align_mode"
   bool parseDirectiveBundleAlignMode();
   // ".bundle_lock"
@@ -2042,6 +2047,10 @@
       return parseDirectiveIrpc(IDLoc);
     case DK_ENDR:
       return parseDirectiveEndr(IDLoc);
+    case DK_BOUNDARY_ALIGN:
+      return parseDirectiveBoundaryAlign();
+    case DK_BOUNDARY_ALIGN_END:
+      return parseDirectiveBoundaryAlignEnd();
     case DK_BUNDLE_ALIGN_MODE:
       return parseDirectiveBundleAlignMode();
     case DK_BUNDLE_LOCK:
@@ -4643,6 +4652,34 @@
   return false;
 }
 
+/// parseDirectiveBundleAlignMode
+/// ::= {.boundary_align} expression
+bool AsmParser::parseDirectiveBoundaryAlign() {
+  // Expect a single argument: an expression that evaluates to a constant
+  // in the inclusive range 0-30.
+  SMLoc ExprLoc = getLexer().getLoc();
+  int64_t AlignSizePow2;
+  if (checkForValidSection() || parseAbsoluteExpression(AlignSizePow2) ||
+      parseToken(AsmToken::EndOfStatement, "unexpected token after expression "
+                                           "in '.boundary_align' "
+                                           "directive") ||
+      check(AlignSizePow2 < 0 || AlignSizePow2 > 30, ExprLoc,
+            "invalid boundary size (expected between 0 and 30)"))
+    return true;
+
+  // Because of AlignSizePow2's verified range we can safely truncate it to
+  // unsigned.
+  getStreamer().EmitBoundaryAlign(static_cast<unsigned>(AlignSizePow2));
+  return false;
+}
+
+/// parseDirectiveBundleAlignMode
+/// ::= {.boundary_align_end}
+bool AsmParser::parseDirectiveBoundaryAlignEnd() {
+  getStreamer().EmitBoundaryAlignEnd();
+  return false;
+}
+
 /// parseDirectiveBundleAlignMode
 /// ::= {.bundle_align_mode} expression
 bool AsmParser::parseDirectiveBundleAlignMode() {
@@ -5374,6 +5411,8 @@
   DirectiveKindMap[".irp"] = DK_IRP;
   DirectiveKindMap[".irpc"] = DK_IRPC;
   DirectiveKindMap[".endr"] = DK_ENDR;
+  DirectiveKindMap[".boundary_align"] = DK_BOUNDARY_ALIGN;
+  DirectiveKindMap[".boundary_align_end"] = DK_BOUNDARY_ALIGN_END;
   DirectiveKindMap[".bundle_align_mode"] = DK_BUNDLE_ALIGN_MODE;
   DirectiveKindMap[".bundle_lock"] = DK_BUNDLE_LOCK;
   DirectiveKindMap[".bundle_unlock"] = DK_BUNDLE_UNLOCK;
Index: llvm/lib/MC/MCStreamer.cpp
===================================================================
--- llvm/lib/MC/MCStreamer.cpp
+++ llvm/lib/MC/MCStreamer.cpp
@@ -1094,6 +1094,8 @@
                                    unsigned MaxBytesToEmit) {}
 void MCStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value,
                                    SMLoc Loc) {}
+void MCStreamer::EmitBoundaryAlign(unsigned BoundaryPow2) {}
+void MCStreamer::EmitBoundaryAlignEnd() {}
 void MCStreamer::EmitBundleAlignMode(unsigned AlignPow2) {}
 void MCStreamer::EmitBundleLock(bool AlignToEnd) {}
 void MCStreamer::FinishImpl() {}
Index: llvm/test/MC/X86/align-branch-64.s
===================================================================
--- /dev/null
+++ llvm/test/MC/X86/align-branch-64.s
@@ -0,0 +1,141 @@
+  # RUN: llvm-mc -filetype=obj -triple x86_64-pc-linux-gnu %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s
+
+  # instruction sizes for reference:
+  #  callq is 5 bytes long
+  #  push %rax is 1 byte
+  #  jmp <near-label> is 2 bytes
+  #  jmp <far-label> is 5 bytes
+  #  ret N is 2 bytes
+
+  # These tests are checking the edge cases on the alignment computation
+
+  .text
+  # CHECK: test1:
+  # CHECK: 20: callq
+  .globl  test1
+  .p2align  5
+test1:
+  .rept 29
+  push %rax
+  .endr
+  .boundary_align 5
+  callq bar
+  .boundary_align_end
+
+  # CHECK: test2:
+  # CHECK: 60: callq
+  .globl  test2
+  .p2align  5
+test2:
+  .rept 31
+  push %rax
+  .endr
+  .boundary_align 5
+  callq bar
+  .boundary_align_end
+
+  # CHECK: test3:
+  # CHECK: a0: callq
+  .globl  test3
+  .p2align  5
+test3:
+  .rept 27
+  push %rax
+  .endr
+  .boundary_align 5
+  callq bar
+  .boundary_align_end
+
+  # next couple check instruction type coverage
+
+  # CHECK: test_jmp:
+  # CHECK: e0: jmp
+  .globl  test_jmp
+  .p2align  5
+test_jmp:
+  .rept 31
+  push %rax
+  .endr
+  .boundary_align 5
+  jmp bar
+  .boundary_align_end
+
+  # CHECK: test_ret:
+  # CHECK: 120: retq
+  .globl  test_ret
+  .p2align  5
+test_ret:
+  .rept 31
+  push %rax
+  .endr
+  .boundary_align 5
+  retq $0
+  .boundary_align_end
+
+  # check a case with a relaxable instruction
+  
+  # CHECK: test_jmp_far:
+  # CHECK: 160: jmp
+  .globl  test_jmp_far
+  .p2align  5
+test_jmp_far:
+  .rept 31
+  push %rax
+  .endr
+  .boundary_align 5
+  jmp baz
+  .boundary_align_end
+
+  # a local target (1 byte immediate)
+  # This is placed in the middle so that it's local for all tests
+  .type   bar,@function
+bar:
+  retq
+
+  # next batch of tests include multiple instructions within the region which
+  # needs to not cross the boundary.
+
+  # CHECK: test_fused_jcc1:
+  # CHECK: 1a0: test
+  .globl  test_fused_jcc1
+  .p2align  5
+test_fused_jcc1:
+  .rept 31
+  push %rax
+  .endr
+  .boundary_align 5
+  testq %rax, %rax
+  jnz bar
+  .boundary_align_end
+
+  # CHECK: test_fused_jcc2:
+  # CHECK: 1e0: test
+  .globl  test_fused_jcc2
+  .p2align  5
+test_fused_jcc2:
+  .rept 28
+  push %rax
+  .endr
+  .boundary_align 5
+  testq %rax, %rax
+  jnz bar
+  .boundary_align_end
+
+  # CHECK: test_fused_jcc3:
+  # CHECK: 220: test
+  .globl  test_fused_jcc3
+  .p2align  5
+test_fused_jcc3:
+  .rept 23
+  push %rax
+  .endr
+  .boundary_align 5
+  testq %rax, %rax
+  jnz baz
+  .boundary_align_end
+
+  # a far target (4 byte imm)
+  .section "unknown"
+  .type   baz,@function
+baz:
+  retq