Index: lib/CodeGen/CGAtomic.cpp
===================================================================
--- lib/CodeGen/CGAtomic.cpp
+++ lib/CodeGen/CGAtomic.cpp
@@ -41,7 +41,9 @@
       assert(lvalue.isSimple());
 
       AtomicTy = lvalue.getType();
-      ValueTy = AtomicTy->castAs<AtomicType>()->getValueType();
+      ValueTy = AtomicTy->isAtomicType()
+                    ? AtomicTy->castAs<AtomicType>()->getValueType()
+                    : AtomicTy;
       EvaluationKind = CGF.getEvaluationKind(ValueTy);
 
       ASTContext &C = CGF.getContext();
@@ -66,7 +68,8 @@
 
       UseLibcall =
         (AtomicSizeInBits > uint64_t(C.toBits(lvalue.getAlignment())) ||
-         AtomicSizeInBits > C.getTargetInfo().getMaxAtomicInlineWidth());
+         AtomicSizeInBits > C.getTargetInfo().getMaxAtomicInlineWidth() ||
+         !C.toCharUnitsFromBits(AtomicSizeInBits).isPowerOfTwo());
     }
 
     QualType getAtomicType() const { return AtomicTy; }
@@ -74,7 +77,7 @@
     CharUnits getAtomicAlignment() const { return AtomicAlign; }
     CharUnits getValueAlignment() const { return ValueAlign; }
     uint64_t getAtomicSizeInBits() const { return AtomicSizeInBits; }
-    uint64_t getValueSizeInBits() const { return AtomicSizeInBits; }
+    uint64_t getValueSizeInBits() const { return ValueSizeInBits; }
     TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
     bool shouldUseLibcall() const { return UseLibcall; }
 
@@ -968,10 +971,12 @@
       result = EmitFromMemory(result, valueType);
     } else if (isa<llvm::PointerType>(resultTy)) {
       result = Builder.CreateIntToPtr(result, resultTy);
-    } else {
+    } else if (llvm::CastInst::isBitCastable(result->getType(), resultTy)) {
       result = Builder.CreateBitCast(result, resultTy);
-    }
-    return RValue::get(result);
+    } else
+      resultTy = nullptr;
+    if (resultTy)
+      return RValue::get(result);
   }
 
   // Create a temporary.  This needs to be big enough to hold the
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -94,7 +94,11 @@
     // kmp_int32 num_threads);
     OMPRTL__kmpc_push_num_threads,
     // Call to void __kmpc_flush(ident_t *loc, ...);
-    OMPRTL__kmpc_flush
+    OMPRTL__kmpc_flush,
+    // Call to void __kmpc_atomic_start(void);
+    OMPRTL__kmpc_atomic_start,
+    // Call to void __kmpc_atomic_end(void);
+    OMPRTL__kmpc_atomic_end
   };
 
 private:
@@ -335,6 +339,10 @@
   /// \param Vars List of variables to flush.
   virtual void EmitOMPFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
                             SourceLocation Loc);
+  /// \brief Emit start of the atomic region.
+  virtual void EmitOMPAtomicStart(CodeGenFunction &CGF);
+  /// \brief Emit end of the atomic region.
+  virtual void EmitOMPAtomicEnd(CodeGenFunction &CGF);
 };
 } // namespace CodeGen
 } // namespace clang
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -371,6 +371,20 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_flush");
     break;
   }
+  case OMPRTL__kmpc_atomic_start: {
+    // Build void __kmpc_atomic_start(void);
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_atomic_start");
+    break;
+  }
+  case OMPRTL__kmpc_atomic_end: {
+    // Build void __kmpc_atomic_end(void);
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_atomic_end");
+    break;
+  }
   }
   return RTLFn;
 }
@@ -670,3 +684,15 @@
       CGOpenMPRuntime::OMPRTL__kmpc_flush);
   CGF.EmitRuntimeCall(RTLFn, Args);
 }
+
+void CGOpenMPRuntime::EmitOMPAtomicStart(CodeGenFunction &CGF) {
+  auto *RTLFn = CGF.CGM.getOpenMPRuntime().CreateRuntimeFunction(
+      CGOpenMPRuntime::OMPRTL__kmpc_atomic_start);
+  CGF.EmitRuntimeCall(RTLFn, llvm::None);
+}
+
+void CGOpenMPRuntime::EmitOMPAtomicEnd(CodeGenFunction &CGF) {
+  auto *RTLFn = CGF.CGM.getOpenMPRuntime().CreateRuntimeFunction(
+      CGOpenMPRuntime::OMPRTL__kmpc_atomic_end);
+  CGF.EmitRuntimeCall(RTLFn, llvm::None);
+}
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -560,8 +560,138 @@
   llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
 }
 
-void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &) {
-  llvm_unreachable("CodeGen for 'omp atomic' is not supported yet.");
+static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
+                              bool IsSeqCst, const Expr *X, const Expr *V,
+                              const Expr *, SourceLocation Loc) {
+  switch (Kind) {
+  case OMPC_read: {
+    // v = x;
+    assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
+    assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
+    LValue XLValue = CGF.EmitLValue(X);
+    LValue VLValue = CGF.EmitLValue(V);
+    RValue Res;
+    if (XLValue.isSimple())
+      Res = CGF.EmitAtomicLoad(XLValue, X->getExprLoc());
+    else {
+      auto &OMPRuntime = CGF.CGM.getOpenMPRuntime();
+      OMPRuntime.EmitOMPAtomicStart(CGF);
+      Res = CGF.EmitLoadOfLValue(XLValue, Loc);
+      OMPRuntime.EmitOMPAtomicEnd(CGF);
+    }
+    // OpenMP, 2.12.6, atomic Construct
+    // Any atomic construct with a seq_cst clause forces the atomically
+    // performed operation to include an implicit flush operation without a
+    // list.
+    if (IsSeqCst)
+      CGF.CGM.getOpenMPRuntime().EmitOMPFlush(CGF, llvm::None, Loc);
+    switch (CGF.getEvaluationKind(V->getType())) {
+    case TEK_Scalar: {
+      llvm::Value *ScalarVal;
+      switch (CGF.getEvaluationKind(X->getType())) {
+      case TEK_Scalar:
+        ScalarVal = CGF.EmitScalarConversion(Res.getScalarVal(), X->getType(),
+                                             V->getType());
+        break;
+      case TEK_Complex:
+        ScalarVal = CGF.EmitComplexToScalarConversion(
+            Res.getComplexVal(), X->getType(), V->getType());
+        break;
+      case TEK_Aggregate:
+        llvm_unreachable("Must be a scalar or complex.");
+      }
+      CGF.EmitStoreOfScalar(ScalarVal, VLValue);
+      break;
+    }
+    case TEK_Complex: {
+      CodeGenFunction::ComplexPairTy ComplexVal;
+      switch (CGF.getEvaluationKind(X->getType())) {
+      case TEK_Scalar: {
+        // Convert the input element to the element type of the complex.
+        auto DestType = V->getType()
+                            .getCanonicalType()
+                            ->castAs<ComplexType>()
+                            ->getElementType();
+        auto Val = CGF.EmitScalarConversion(Res.getScalarVal(), X->getType(),
+                                            DestType);
+        ComplexVal = CodeGenFunction::ComplexPairTy(
+            Val, llvm::Constant::getNullValue(Val->getType()));
+        break;
+      }
+      case TEK_Complex: {
+        auto SrcType = X->getType()
+                           .getCanonicalType()
+                           ->castAs<ComplexType>()
+                           ->getElementType();
+        auto DestType = V->getType()
+                            .getCanonicalType()
+                            ->castAs<ComplexType>()
+                            ->getElementType();
+        ComplexVal.first = CGF.EmitScalarConversion(Res.getComplexVal().first,
+                                                    SrcType, DestType);
+        ComplexVal.second = CGF.EmitScalarConversion(Res.getComplexVal().second,
+                                                     SrcType, DestType);
+
+        break;
+      }
+      case TEK_Aggregate:
+        llvm_unreachable("Must be a scalar or complex.");
+      }
+      CGF.EmitStoreOfComplex(ComplexVal, VLValue, /*isInit=*/false);
+      break;
+    }
+    case TEK_Aggregate:
+      llvm_unreachable("Must be a scalar or complex.");
+    }
+    break;
+  }
+  case OMPC_write:
+  case OMPC_update:
+  case OMPC_capture:
+    llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
+    break;
+  case OMPC_if:
+  case OMPC_final:
+  case OMPC_num_threads:
+  case OMPC_private:
+  case OMPC_firstprivate:
+  case OMPC_lastprivate:
+  case OMPC_reduction:
+  case OMPC_safelen:
+  case OMPC_collapse:
+  case OMPC_default:
+  case OMPC_seq_cst:
+  case OMPC_shared:
+  case OMPC_linear:
+  case OMPC_aligned:
+  case OMPC_copyin:
+  case OMPC_copyprivate:
+  case OMPC_flush:
+  case OMPC_proc_bind:
+  case OMPC_schedule:
+  case OMPC_ordered:
+  case OMPC_nowait:
+  case OMPC_untied:
+  case OMPC_threadprivate:
+  case OMPC_mergeable:
+  case OMPC_unknown:
+    llvm_unreachable("Clause is not allowed in 'omp atomic'.");
+    break;
+  }
+}
+
+void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
+  bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
+  OpenMPClauseKind Kind = OMPC_unknown;
+  for (auto *C : S.clauses()) {
+    // Find first clause (skip seq_cst clause, if it is first).
+    if (C->getClauseKind() != OMPC_seq_cst) {
+      Kind = C->getClauseKind();
+      break;
+    }
+  }
+  EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
+                    S.getLocStart());
 }
 
 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
Index: test/OpenMP/atomic_read_codegen.c
===================================================================
--- test/OpenMP/atomic_read_codegen.c
+++ test/OpenMP/atomic_read_codegen.c
@@ -0,0 +1,215 @@
+// RUN: %clang_cc1 -verify -fopenmp=libiomp5 -x c -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-unknown-unknown -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+_Bool bv, bx;
+char cv, cx;
+unsigned char ucv, ucx;
+short sv, sx;
+unsigned short usv, usx;
+int iv, ix;
+unsigned int uiv, uix;
+long lv, lx;
+unsigned long ulv, ulx;
+long long llv, llx;
+unsigned long long ullv, ullx;
+float fv, fx;
+double dv, dx;
+long double ldv, ldx;
+_Complex int civ, cix;
+_Complex float cfv, cfx;
+_Complex double cdv, cdx;
+
+typedef int v4si __attribute__((__vector_size__(16)));
+v4si v4six;
+
+struct BitFields {
+  int a : 3;
+} bfx;
+
+typedef float float2 __attribute__((ext_vector_type(2)));
+float2 float2x;
+
+register int rix __asm__("0");
+
+int main() {
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  bv = bx;
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  cv = cx;
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  ucv = ucx;
+// CHECK: load atomic i16*
+// CHECK: store i16
+#pragma omp atomic read
+  sv = sx;
+// CHECK: load atomic i16*
+// CHECK: store i16
+#pragma omp atomic read
+  usv = usx;
+// CHECK: load atomic i32*
+// CHECK: store i32
+#pragma omp atomic read
+  iv = ix;
+// CHECK: load atomic i32*
+// CHECK: store i32
+#pragma omp atomic read
+  uiv = uix;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  lv = lx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  ulv = ulx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  llv = llx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  ullv = ullx;
+// CHECK: load atomic i32*
+// CHECK: store float
+#pragma omp atomic read
+  fv = fx;
+// CHECK: load atomic i64*
+// CHECK: store double
+#pragma omp atomic read
+  dv = dx;
+// CHECK: load atomic i128*
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = ldx;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store i32
+// CHECK: store i32
+#pragma omp atomic read
+  civ = cix;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store float
+// CHECK: store float
+#pragma omp atomic read
+  cfv = cfx;
+// CHECK: call{{.*}} void @__atomic_load(i64 16,
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store double
+// CHECK: store double
+#pragma omp atomic seq_cst read
+  cdv = cdx;
+// CHECK: load atomic i64*
+// CHECK: store i8
+#pragma omp atomic read
+  bv = ulx;
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  cv = bx;
+// CHECK: load atomic i8*
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store i8
+#pragma omp atomic read, seq_cst
+  ucv = cx;
+// CHECK: load atomic i64*
+// CHECK: store i16
+#pragma omp atomic read
+  sv = ulx;
+// CHECK: load atomic i64*
+// CHECK: store i16
+#pragma omp atomic read
+  usv = lx;
+// CHECK: load atomic i32*
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store i32
+#pragma omp atomic seq_cst, read
+  iv = uix;
+// CHECK: load atomic i32*
+// CHECK: store i32
+#pragma omp atomic read
+  uiv = ix;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store i64
+#pragma omp atomic read
+  lv = cix;
+// CHECK: load atomic i32*
+// CHECK: store i64
+#pragma omp atomic read
+  ulv = fx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  llv = dx;
+// CHECK: load atomic i128*
+// CHECK: store i64
+#pragma omp atomic read
+  ullv = ldx;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store float
+#pragma omp atomic read
+  fv = cix;
+// CHECK: load atomic i16*
+// CHECK: store double
+#pragma omp atomic read
+  dv = sx;
+// CHECK: load atomic i8*
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bx;
+// CHECK: load atomic i8*
+// CHECK: store i32
+// CHECK: store i32
+#pragma omp atomic read
+  civ = bx;
+// CHECK: load atomic i16*
+// CHECK: store float
+// CHECK: store float
+#pragma omp atomic read
+  cfv = usx;
+// CHECK: load atomic i64*
+// CHECK: store double
+// CHECK: store double
+#pragma omp atomic read
+  cdv = llx;
+// CHECK: call{{.*}} void @__kmpc_atomic_start(
+// CHECK: load <4 x i32>*
+// CHECK: extractelement
+// CHECK: call{{.*}} void @__kmpc_atomic_end(
+// CHECK: store i8
+#pragma omp atomic read
+  bv = v4six[0];
+// CHECK: call{{.*}} void @__kmpc_atomic_start(
+// CHECK: load i
+// CHECK: call{{.*}} void @__kmpc_atomic_end(
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx.a;
+// CHECK: call{{.*}} void @__kmpc_atomic_start(
+// CHECK: load <2 x float>*
+// CHECK: extractelement
+// CHECK: call{{.*}} void @__kmpc_atomic_end(
+// CHECK: store i64
+#pragma omp atomic read
+  ulv = float2x.x;
+// CHECK: call{{.*}} void @__kmpc_atomic_start(
+// CHECK: call{{.*}} i{{[0-9]+}} @llvm.read_register
+// CHECK: call{{.*}} void @__kmpc_atomic_end(
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store double
+#pragma omp atomic read seq_cst
+  dv = rix;
+  return 0;
+}
+
+#endif