Index: include/clang/Basic/TargetInfo.h
===================================================================
--- include/clang/Basic/TargetInfo.h
+++ include/clang/Basic/TargetInfo.h
@@ -370,6 +370,14 @@
   /// \brief Return the maximum width lock-free atomic operation which can be
   /// inlined given the supported features of the given target.
   unsigned getMaxAtomicInlineWidth() const { return MaxAtomicInlineWidth; }
+  /// \brief Returns true if the given target supports lock-free atomic
+  /// operations at the specified width and alignment.
+  virtual bool hasBuiltinAtomic(uint64_t AtomicSizeInBits,
+                                uint64_t AlignmentInBits) const {
+    return AtomicSizeInBits <= AlignmentInBits &&
+           AtomicSizeInBits <= getMaxAtomicInlineWidth() &&
+           llvm::isPowerOf2_64(AtomicSizeInBits / getCharWidth());
+  }
 
   /// \brief Return the maximum vector alignment supported for the given target.
   unsigned getMaxVectorAlign() const { return MaxVectorAlign; }
Index: lib/CodeGen/CGAtomic.cpp
===================================================================
--- lib/CodeGen/CGAtomic.cpp
+++ lib/CodeGen/CGAtomic.cpp
@@ -64,9 +64,8 @@
       if (lvalue.getAlignment().isZero())
         lvalue.setAlignment(AtomicAlign);
 
-      UseLibcall =
-        (AtomicSizeInBits > uint64_t(C.toBits(lvalue.getAlignment())) ||
-         AtomicSizeInBits > C.getTargetInfo().getMaxAtomicInlineWidth());
+      UseLibcall = !C.getTargetInfo().hasBuiltinAtomic(
+          AtomicSizeInBits, C.toBits(lvalue.getAlignment()));
     }
 
     QualType getAtomicType() const { return AtomicTy; }
@@ -74,7 +73,7 @@
     CharUnits getAtomicAlignment() const { return AtomicAlign; }
     CharUnits getValueAlignment() const { return ValueAlign; }
     uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; }
-    uint64_t getValueSizeInBits() const { return AtomicSizeInBits; }
+    uint64_t getValueSizeInBits() const { return ValueSizeInBits; }
     TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
     bool shouldUseLibcall() const { return UseLibcall; }
 
@@ -965,13 +964,11 @@
     llvm::Type *resultTy = CGM.getTypes().ConvertTypeForMem(valueType);
     if (isa<llvm::IntegerType>(resultTy)) {
       assert(result->getType() == resultTy);
-      result = EmitFromMemory(result, valueType);
-    } else if (isa<llvm::PointerType>(resultTy)) {
-      result = Builder.CreateIntToPtr(result, resultTy);
-    } else {
-      result = Builder.CreateBitCast(result, resultTy);
-    }
-    return RValue::get(result);
+      return RValue::get(EmitFromMemory(result, valueType));
+    } else if (isa<llvm::PointerType>(resultTy))
+      return RValue::get(Builder.CreateIntToPtr(result, resultTy));
+    else if (llvm::CastInst::isBitCastable(result->getType(), resultTy))
+      return RValue::get(Builder.CreateBitCast(result, resultTy));
   }
 
   // Create a temporary.  This needs to be big enough to hold the
@@ -1088,7 +1085,7 @@
   }
 
   // Okay, we're doing this natively.
-  llvm::Value *intValue;
+  llvm::Value *intValue = nullptr;
 
   // If we've got a scalar value of the right size, try to avoid going
   // through memory.
@@ -1099,15 +1096,14 @@
     } else {
       llvm::IntegerType *inputIntTy =
         llvm::IntegerType::get(getLLVMContext(), atomics.getValueSizeInBits());
-      if (isa<llvm::PointerType>(value->getType())) {
+      if (isa<llvm::PointerType>(value->getType()))
         intValue = Builder.CreatePtrToInt(value, inputIntTy);
-      } else {
+      else if (llvm::BitCastInst::isBitCastable(value->getType(), inputIntTy))
         intValue = Builder.CreateBitCast(value, inputIntTy);
-      }
     }
-
+  }
   // Otherwise, we need to go through memory.
-  } else {
+  if (!intValue) {
     // Put the r-value in memory.
     llvm::Value *addr = atomics.materializeRValue(rvalue);
 
Index: test/CodeGen/x86_64-atomic-long_double.c
===================================================================
--- test/CodeGen/x86_64-atomic-long_double.c
+++ test/CodeGen/x86_64-atomic-long_double.c
@@ -0,0 +1,21 @@
+// RUN: %clang_cc1 -triple x86_64-linux-gnu -target-cpu core2 %s -S -emit-llvm -o - | FileCheck %s
+
+long double test_load(_Atomic long double *addr) {
+  // CHECK-LABEL: @test_load
+  // CHECK: bitcast x86_fp80* %{{.+}} to i128*
+  // CHECK: load atomic i128*
+  // CHECK: bitcast x86_fp80* %{{.+}} to i128*
+  // CHECK: store i128 %{{.+}}, i128*
+  // CHECK: load x86_fp80*
+  // CHECK: ret x86_fp80
+  return *addr;
+}
+
+void  test_store(_Atomic long double *addr, long double val) {
+  // CHECK-LABEL: @test_store
+  // CHECK: bitcast x86_fp80* %{{.+}} to i128*
+  // CHECK: load i128*
+  // CHECK: bitcast x86_fp80* %{{.+}} to i128*
+  // CHECK: store atomic i128 %{{.+}}, i128* {{.+}}
+  *addr = val;
+}