Index: include/llvm/Target/TargetLowering.h
===================================================================
--- include/llvm/Target/TargetLowering.h
+++ include/llvm/Target/TargetLowering.h
@@ -1546,6 +1546,12 @@
     return AtomicExpansionKind::None;
   }
 
+  /// Returns true if the given target supports lock-free atomic operations at
+  /// the specified width and alignment.
+  virtual bool hasBuiltinAtomic(uint64_t Size, uint64_t Align) const {
+    return Align >= Size && Size <= getMaxAtomicSizeInBitsSupported() / 8;
+  }
+
   /// On some platforms, an AtomicRMW that never actually modifies the value
   /// (such as fetch_add of 0) can be turned into a fence followed by an
   /// atomic load. This may sound useless, but it makes it possible for the
Index: lib/CodeGen/AtomicExpandPass.cpp
===================================================================
--- lib/CodeGen/AtomicExpandPass.cpp
+++ lib/CodeGen/AtomicExpandPass.cpp
@@ -163,7 +163,7 @@
 bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
   unsigned Size = getAtomicOpSize(I);
   unsigned Align = getAtomicOpAlign(I);
-  return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
+  return TLI->hasBuiltinAtomic(Size, Align);
 }
 
 } // end anonymous namespace
Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4069,8 +4069,8 @@
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
 
-  if (I.getAlignment() < VT.getSizeInBits() / 8)
-    report_fatal_error("Cannot generate unaligned atomic load");
+  if (!TLI.hasBuiltinAtomic(VT.getSizeInBits() / 8, I.getAlignment()))
+    report_fatal_error("Cannot generate atomic load for the instruction");
 
   MachineMemOperand *MMO =
       DAG.getMachineFunction().
@@ -4105,8 +4105,8 @@
   EVT VT =
       TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
 
-  if (I.getAlignment() < VT.getSizeInBits() / 8)
-    report_fatal_error("Cannot generate unaligned atomic store");
+  if (!TLI.hasBuiltinAtomic(VT.getSizeInBits() / 8, I.getAlignment()))
+    report_fatal_error("Cannot generate atomic store for the instruction");
 
   SDValue OutChain =
     DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
Index: lib/Target/X86/X86ISelLowering.h
===================================================================
--- lib/Target/X86/X86ISelLowering.h
+++ lib/Target/X86/X86ISelLowering.h
@@ -1263,6 +1263,7 @@
     TargetLoweringBase::AtomicExpansionKind
     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
 
+    bool hasBuiltinAtomic(uint64_t Size, uint64_t Align) const override;
     LoadInst *
     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
 
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -472,6 +472,7 @@
   if (Subtarget.hasCmpxchg16b()) {
     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
   }
+  setMaxAtomicSizeInBitsSupported(128);
 
   // FIXME - use subtarget debug flags
   if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
@@ -22837,6 +22838,10 @@
   }
 }
 
+bool X86TargetLowering::hasBuiltinAtomic(uint64_t Size, uint64_t Align) const {
+  return Size <= getMaxAtomicSizeInBitsSupported() / 8;
+}
+
 LoadInst *
 X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const {
   unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32;
Index: test/Transforms/AtomicExpand/X86/expand-unaligned-expansion.ll
===================================================================
--- test/Transforms/AtomicExpand/X86/expand-unaligned-expansion.ll
+++ test/Transforms/AtomicExpand/X86/expand-unaligned-expansion.ll
@@ -0,0 +1,79 @@
+; RUN: opt -S %s -atomic-expand -mtriple=x86_64-linux-gnu | FileCheck %s
+; RUN: opt -S %s -atomic-expand -mtriple=x86_64-linux-gnu -mattr=cx16 | FileCheck %s --check-prefix=CHECK-CX16
+; X86_64 supports unaligned atomic load/store for primitive integer types.
+; Check the unaligned atomic load/store are properly expanded.
+
+define i16 @test_unaligned_load_i16(i16* %ptr) {
+; CHECK-LABEL: @test_unaligned_load_i16
+; CHECK: %t0 = load atomic i16, i16* %ptr monotonic, align 1
+; CHECK: ret i16 %t0
+  %t0 = load atomic i16, i16* %ptr monotonic, align 1
+  ret i16 %t0
+}
+
+define i32 @test_unaligned_load_i32(i32* %ptr) {
+; CHECK-LABEL: @test_unaligned_load_i32
+; CHECK: %t0 = load atomic i32, i32* %ptr monotonic, align 2
+; CHECK: ret i32 %t0
+  %t0 = load atomic i32, i32* %ptr monotonic, align 2
+  ret i32 %t0
+}
+
+define i64 @test_unaligned_load_i64(i64* %ptr) {
+; CHECK-LABEL: @test_unaligned_load_i64
+; CHECK: %t0 = load atomic i64, i64* %ptr monotonic, align 4
+; CHECK: ret i64 %t0
+  %t0 = load atomic i64, i64* %ptr monotonic, align 4
+  ret i64 %t0
+}
+
+define i128 @test_unaligned_load_i128(i128* %ptr) {
+; CHECK-LABEL: @test_unaligned_load_i128
+; CHECK: %t0 = load atomic i128, i128* %ptr monotonic, align 8
+; CHECK: ret i128 %t0
+; CHECK-CX16: %[[XCHG:.*]] = cmpxchg i128* %ptr, i128 0, i128 0 monotonic monotonic
+; CHECK-CX16: %[[EXTR:.*]] = extractvalue { i128, i1 } %[[XCHG]], 0
+; CHECK-CX16: ret i128 %[[EXTR]]
+  %t0 = load atomic i128, i128* %ptr monotonic, align 8
+  ret i128 %t0
+}
+
+define void @test_unaligned_store_i16(i16 %val, i16* %ptr) {
+; CHECK-LABEL: @test_unaligned_store_i16
+; CHECK: store atomic i16 %val, i16* %ptr monotonic, align 1
+; CHECK: ret void
+  store atomic i16 %val, i16* %ptr monotonic, align 1
+  ret void
+}
+
+define void @test_unaligned_store_i32(i32 %val, i32* %ptr) {
+; CHECK-LABEL: @test_unaligned_store_i32
+; CHECK: store atomic i32 %val, i32* %ptr monotonic, align 2
+; CHECK: ret void
+  store atomic i32 %val, i32* %ptr monotonic, align 2
+  ret void
+}
+
+define void @test_unaligned_store_i64(i64 %val, i64* %ptr) {
+; CHECK-LABEL: @test_unaligned_store_i64
+; CHECK: store atomic i64 %val, i64* %ptr monotonic, align 4
+; CHECK: ret void
+  store atomic i64 %val, i64* %ptr monotonic, align 4
+  ret void
+}
+
+define void @test_unaligned_store_i128(i128 %val, i128* %ptr) {
+; CHECK-LABEL: @test_unaligned_store_i128
+; CHECK: store atomic i128 %val, i128* %ptr monotonic, align 4
+; CHECK: ret void
+; CHECK-CX16:   %[[LOAD:.*]] = load i128, i128* %ptr, align 16
+; CHECK-CX16:   br label %atomicrmw.start
+; CHECK-CX16: atomicrmw.start:
+; CHECK-CX16:   %loaded = phi i128 [ %[[LOAD]], %0 ], [ %newloaded, %atomicrmw.start ]
+; CHECK-CX16:   %[[XCHG:.*]] = cmpxchg i128* %ptr, i128 %loaded, i128 %val monotonic monotonic
+; CHECK-CX16:   %[[SUCC:.*]] = extractvalue { i128, i1 } %[[XCHG]], 1
+; CHECK-CX16:   %newloaded = extractvalue { i128, i1 } %[[XCHG]], 0
+; CHECK-CX16:   br i1 %[[SUCC]], label %atomicrmw.end, label %atomicrmw.start
+  store atomic i128 %val, i128* %ptr monotonic, align 4
+  ret void
+}