Index: lib/CodeGen/CGAtomic.cpp
===================================================================
--- lib/CodeGen/CGAtomic.cpp
+++ lib/CodeGen/CGAtomic.cpp
@@ -36,36 +36,51 @@
     CharUnits LValueAlign;
     TypeEvaluationKind EvaluationKind;
     bool UseLibcall;
+    llvm::Value *Addr;
   public:
-    AtomicInfo(CodeGenFunction &CGF, LValue &lvalue) : CGF(CGF) {
-      assert(lvalue.isSimple());
-
-      AtomicTy = lvalue.getType();
-      ValueTy = AtomicTy->castAs<AtomicType>()->getValueType();
-      EvaluationKind = CGF.getEvaluationKind(ValueTy);
-
-      ASTContext &C = CGF.getContext();
-
-      uint64_t ValueAlignInBits;
-      uint64_t AtomicAlignInBits;
-      TypeInfo ValueTI = C.getTypeInfo(ValueTy);
-      ValueSizeInBits = ValueTI.Width;
-      ValueAlignInBits = ValueTI.Align;
-
-      TypeInfo AtomicTI = C.getTypeInfo(AtomicTy);
-      AtomicSizeInBits = AtomicTI.Width;
-      AtomicAlignInBits = AtomicTI.Align;
-
-      assert(ValueSizeInBits <= AtomicSizeInBits);
-      assert(ValueAlignInBits <= AtomicAlignInBits);
-
-      AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits);
-      ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits);
-      if (lvalue.getAlignment().isZero())
-        lvalue.setAlignment(AtomicAlign);
-
-      UseLibcall = !C.getTargetInfo().hasBuiltinAtomic(
-          AtomicSizeInBits, C.toBits(lvalue.getAlignment()));
+    AtomicInfo(CodeGenFunction &CGF, LValue &lvalue)
+        : CGF(CGF), AtomicSizeInBits(0), ValueSizeInBits(0), UseLibcall(true),
+          Addr(nullptr) {
+      assert(!lvalue.isGlobalReg() && "Global registers are not supported!");
+      if (lvalue.isSimple()) {
+        Addr = lvalue.getAddress();
+        AtomicTy = lvalue.getType();
+        if (auto *ATy = AtomicTy->getAs<AtomicType>())
+          ValueTy = ATy->getValueType();
+        else
+          ValueTy = AtomicTy;
+        EvaluationKind = CGF.getEvaluationKind(ValueTy);
+
+        ASTContext &C = CGF.getContext();
+
+        uint64_t ValueAlignInBits;
+        uint64_t AtomicAlignInBits;
+        TypeInfo ValueTI = C.getTypeInfo(ValueTy);
+        ValueSizeInBits = ValueTI.Width;
+        ValueAlignInBits = ValueTI.Align;
+
+        TypeInfo AtomicTI = C.getTypeInfo(AtomicTy);
+        AtomicSizeInBits = AtomicTI.Width;
+        AtomicAlignInBits = AtomicTI.Align;
+
+        assert(ValueSizeInBits <= AtomicSizeInBits);
+        assert(ValueAlignInBits <= AtomicAlignInBits);
+
+        AtomicAlign = C.toCharUnitsFromBits(AtomicAlignInBits);
+        ValueAlign = C.toCharUnitsFromBits(ValueAlignInBits);
+        if (lvalue.getAlignment().isZero())
+          lvalue.setAlignment(AtomicAlign);
+
+        UseLibcall = !C.getTargetInfo().hasBuiltinAtomic(
+            AtomicSizeInBits, C.toBits(lvalue.getAlignment()));
+      } else if (lvalue.isBitField())
+        Addr = lvalue.getBitFieldAddr();
+      else if (lvalue.isVectorElt())
+        Addr = lvalue.getVectorAddr();
+      else {
+        assert(lvalue.isExtVectorElt());
+        Addr = lvalue.getExtVectorAddr();
+      }
     }
 
     QualType getAtomicType() const { return AtomicTy; }
@@ -76,6 +91,7 @@
     uint64_t getValueSizeInBits() const { return ValueSizeInBits; }
     TypeEvaluationKind getEvaluationKind() const { return EvaluationKind; }
     bool shouldUseLibcall() const { return UseLibcall; }
+    llvm::Value *getAddress() const { return Addr; }
 
     /// Is the atomic size larger than the underlying value type?
     ///
@@ -90,6 +106,10 @@
     bool emitMemSetZeroIfNecessary(LValue dest) const;
 
     llvm::Value *getAtomicSizeValue() const {
+      if (!AtomicSizeInBits) {
+        auto Ty = Addr->getType()->getPointerElementType();
+        return llvm::ConstantExpr::getSizeOf(Ty);
+      }
       CharUnits size = CGF.getContext().toCharUnitsFromBits(AtomicSizeInBits);
       return CGF.CGM.getSize(size);
     }
@@ -962,18 +982,24 @@
   // Check whether we should use a library call.
   if (atomics.shouldUseLibcall()) {
     llvm::Value *tempAddr;
-    if (!resultSlot.isIgnored()) {
-      assert(atomics.getEvaluationKind() == TEK_Aggregate);
-      tempAddr = resultSlot.getAddr();
+    if (src.isSimple()) {
+      if (!resultSlot.isIgnored()) {
+        assert(atomics.getEvaluationKind() == TEK_Aggregate);
+        tempAddr = resultSlot.getAddr();
+      } else
+        tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
     } else {
-      tempAddr = CreateMemTemp(atomics.getAtomicType(), "atomic-load-temp");
+      auto AllocaTy = atomics.getAddress()->getType()->getPointerElementType();
+      auto TempAlloca = CreateTempAlloca(AllocaTy, "atomic-load-temp");
+      TempAlloca->setAlignment(getContext().toBits(src.getAlignment()));
+      tempAddr = TempAlloca;
     }
 
     // void __atomic_load(size_t size, void *mem, void *return, int order);
     CallArgList args;
     args.add(RValue::get(atomics.getAtomicSizeValue()),
              getContext().getSizeType());
-    args.add(RValue::get(EmitCastToVoidPtr(src.getAddress())),
+    args.add(RValue::get(EmitCastToVoidPtr(atomics.getAddress())),
              getContext().VoidPtrTy);
     args.add(RValue::get(EmitCastToVoidPtr(tempAddr)),
              getContext().VoidPtrTy);
@@ -983,7 +1009,19 @@
     emitAtomicLibcall(*this, "__atomic_load", getContext().VoidTy, args);
 
     // Produce the r-value.
-    return atomics.convertTempToRValue(tempAddr, resultSlot, loc);
+    if (src.isSimple())
+      return atomics.convertTempToRValue(tempAddr, resultSlot, loc);
+    else if (src.isBitField())
+      return EmitLoadOfBitfieldLValue(LValue::MakeBitfield(
+          tempAddr, src.getBitFieldInfo(), src.getType(), src.getAlignment()));
+    else if (src.isVectorElt())
+      return EmitLoadOfLValue(
+          LValue::MakeVectorElt(tempAddr, src.getVectorIdx(), src.getType(),
+                                src.getAlignment()),
+          loc);
+    else if (src.isExtVectorElt())
+      return EmitLoadOfExtVectorElementLValue(LValue::MakeExtVectorElt(
+          tempAddr, src.getExtVectorElts(), src.getType(), src.getAlignment()));
   }
 
   // Okay, we're doing this natively.
Index: lib/CodeGen/CGOpenMPRuntime.h
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.h
+++ lib/CodeGen/CGOpenMPRuntime.h
@@ -88,6 +88,10 @@
     OMPRTL__kmpc_master,
     // Call to void __kmpc_end_master(ident_t *, kmp_int32 global_tid);
     OMPRTL__kmpc_end_master,
+    // Call to void __kmpc_atomic_start(void);
+    OMPRTL__kmpc_atomic_start,
+    // Call to void __kmpc_atomic_end(void);
+    OMPRTL__kmpc_atomic_end,
   };
 
   /// \brief Values for bit flags used in the ident_t to describe the fields.
@@ -395,6 +399,13 @@
   /// \param Vars List of variables to flush.
   virtual void EmitOMPFlush(CodeGenFunction &CGF, ArrayRef<const Expr *> Vars,
                             SourceLocation Loc);
+
+  /// \brief Emits an atomic region.
+  /// \param AtomicOpGen Generator for the statement associated with the given
+  /// atomic region.
+  virtual void EmitOMPAtomicRegion(CodeGenFunction &CGF,
+                                   const std::function<void()> &AtomicOpGen,
+                                   SourceLocation Loc);
 };
 } // namespace CodeGen
 } // namespace clang
Index: lib/CodeGen/CGOpenMPRuntime.cpp
===================================================================
--- lib/CodeGen/CGOpenMPRuntime.cpp
+++ lib/CodeGen/CGOpenMPRuntime.cpp
@@ -475,6 +475,20 @@
     RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_end_master");
     break;
   }
+  case OMPRTL__kmpc_atomic_start: {
+    // Build void __kmpc_atomic_start(void);
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_atomic_start");
+    break;
+  }
+  case OMPRTL__kmpc_atomic_end: {
+    // Build void __kmpc_atomic_end(void);
+    llvm::FunctionType *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, /*Name=*/"__kmpc_atomic_end");
+    break;
+  }
   }
   return RTLFn;
 }
@@ -920,3 +934,32 @@
   auto *RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_flush);
   CGF.EmitRuntimeCall(RTLFn, Args);
 }
+
+void CGOpenMPRuntime::EmitOMPAtomicRegion(
+    CodeGenFunction &CGF, const std::function<void()> &AtomicOpGen,
+    SourceLocation) {
+  // __kmpc_atomic_start();
+  // AtomicOpGen();
+  // __kmpc_atomic_end();
+  // Prepare arguments and build a call to __kmpc_atomic_start().
+  auto RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_atomic_start);
+  CGF.EmitRuntimeCall(RTLFn, llvm::None);
+  AtomicOpGen();
+  // Build a call to __kmpc_atomic_end().
+  // OpenMP [1.2.2 OpenMP Language Terminology]
+  // For C/C++, an executable statement, possibly compound, with a single
+  // entry at the top and a single exit at the bottom, or an OpenMP construct.
+  // * Access to the structured block must not be the result of a branch.
+  // * The point of exit cannot be a branch out of the structured block.
+  // * The point of entry must not be a call to setjmp().
+  // * longjmp() and throw() must not violate the entry/exit criteria.
+  // * An expression statement, iteration statement, selection statement, or
+  // try block is considered to be a structured block if the corresponding
+  // compound statement obtained by enclosing it in { and } would be a
+  // structured block.
+  // It is analyzed in Sema, so we can just call __kmpc_end_master() on
+  // fallthrough rather than pushing a normal cleanup for it.
+  RTLFn = CreateRuntimeFunction(OMPRTL__kmpc_atomic_end);
+  CGF.EmitRuntimeCall(RTLFn, llvm::None);
+}
+
Index: lib/CodeGen/CGStmtOpenMP.cpp
===================================================================
--- lib/CodeGen/CGStmtOpenMP.cpp
+++ lib/CodeGen/CGStmtOpenMP.cpp
@@ -691,8 +691,130 @@
   llvm_unreachable("CodeGen for 'omp ordered' is not supported yet.");
 }
 
-void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &) {
-  llvm_unreachable("CodeGen for 'omp atomic' is not supported yet.");
+static llvm::Value *convertToScalarValue(CodeGenFunction &CGF, RValue Val,
+                                         QualType SrcType, QualType DestType) {
+  assert(CGF.hasScalarEvaluationKind(DestType) &&
+         "DestType must have scalar evaluation kind.");
+  assert(!Val.isAggregate() && "Must be a scalar or complex.");
+  return Val.isScalar()
+             ? CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestType)
+             : CGF.EmitComplexToScalarConversion(Val.getComplexVal(), SrcType,
+                                                 DestType);
+}
+
+static CodeGenFunction::ComplexPairTy
+convertToComplexValue(CodeGenFunction &CGF, RValue Val, QualType SrcType,
+                      QualType DestType) {
+  assert(CGF.getEvaluationKind(DestType) == TEK_Complex &&
+         "DestType must have complex evaluation kind.");
+  CodeGenFunction::ComplexPairTy ComplexVal;
+  if (Val.isScalar()) {
+    // Convert the input element to the element type of the complex.
+    auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
+    auto ScalarVal =
+        CGF.EmitScalarConversion(Val.getScalarVal(), SrcType, DestElementType);
+    ComplexVal = CodeGenFunction::ComplexPairTy(
+        ScalarVal, llvm::Constant::getNullValue(ScalarVal->getType()));
+  } else {
+    assert(Val.isComplex() && "Must be a scalar or complex.");
+    auto SrcElementType = SrcType->castAs<ComplexType>()->getElementType();
+    auto DestElementType = DestType->castAs<ComplexType>()->getElementType();
+    ComplexVal.first = CGF.EmitScalarConversion(
+        Val.getComplexVal().first, SrcElementType, DestElementType);
+    ComplexVal.second = CGF.EmitScalarConversion(
+        Val.getComplexVal().second, SrcElementType, DestElementType);
+  }
+  return ComplexVal;
+}
+
+static void EmitOMPAtomicReadExpr(CodeGenFunction &CGF, bool IsSeqCst,
+                                  const Expr *X, const Expr *V,
+                                  SourceLocation Loc) {
+  // v = x;
+  assert(V->isLValue() && "V of 'omp atomic read' is not lvalue");
+  assert(X->isLValue() && "X of 'omp atomic read' is not lvalue");
+  LValue XLValue = CGF.EmitLValue(X);
+  LValue VLValue = CGF.EmitLValue(V);
+  RValue Res;
+  if (!XLValue.isGlobalReg())
+    Res = CGF.EmitAtomicLoad(XLValue, Loc);
+  else
+    CGF.CGM.getOpenMPRuntime().EmitOMPAtomicRegion(CGF, [&]() -> void {
+      Res = CGF.EmitLoadOfGlobalRegLValue(XLValue);
+    }, Loc);
+  // OpenMP, 2.12.6, atomic Construct
+  // Any atomic construct with a seq_cst clause forces the atomically
+  // performed operation to include an implicit flush operation without a
+  // list.
+  if (IsSeqCst)
+    CGF.CGM.getOpenMPRuntime().EmitOMPFlush(CGF, llvm::None, Loc);
+  switch (CGF.getEvaluationKind(V->getType())) {
+  case TEK_Scalar:
+    CGF.EmitStoreOfScalar(
+        convertToScalarValue(CGF, Res, X->getType(), V->getType()), VLValue);
+    break;
+  case TEK_Complex:
+    CGF.EmitStoreOfComplex(
+        convertToComplexValue(CGF, Res, X->getType(), V->getType()), VLValue,
+        /*isInit=*/false);
+    break;
+  case TEK_Aggregate:
+    llvm_unreachable("Must be a scalar or complex.");
+  }
+}
+
+static void EmitOMPAtomicExpr(CodeGenFunction &CGF, OpenMPClauseKind Kind,
+                              bool IsSeqCst, const Expr *X, const Expr *V,
+                              const Expr *, SourceLocation Loc) {
+  switch (Kind) {
+  case OMPC_read:
+    EmitOMPAtomicReadExpr(CGF, IsSeqCst, X, V, Loc);
+    break;
+  case OMPC_write:
+  case OMPC_update:
+  case OMPC_capture:
+    llvm_unreachable("CodeGen for 'omp atomic clause' is not supported yet.");
+  case OMPC_if:
+  case OMPC_final:
+  case OMPC_num_threads:
+  case OMPC_private:
+  case OMPC_firstprivate:
+  case OMPC_lastprivate:
+  case OMPC_reduction:
+  case OMPC_safelen:
+  case OMPC_collapse:
+  case OMPC_default:
+  case OMPC_seq_cst:
+  case OMPC_shared:
+  case OMPC_linear:
+  case OMPC_aligned:
+  case OMPC_copyin:
+  case OMPC_copyprivate:
+  case OMPC_flush:
+  case OMPC_proc_bind:
+  case OMPC_schedule:
+  case OMPC_ordered:
+  case OMPC_nowait:
+  case OMPC_untied:
+  case OMPC_threadprivate:
+  case OMPC_mergeable:
+  case OMPC_unknown:
+    llvm_unreachable("Clause is not allowed in 'omp atomic'.");
+  }
+}
+
+void CodeGenFunction::EmitOMPAtomicDirective(const OMPAtomicDirective &S) {
+  bool IsSeqCst = S.getSingleClause(/*K=*/OMPC_seq_cst);
+  OpenMPClauseKind Kind = OMPC_unknown;
+  for (auto *C : S.clauses()) {
+    // Find first clause (skip seq_cst clause, if it is first).
+    if (C->getClauseKind() != OMPC_seq_cst) {
+      Kind = C->getClauseKind();
+      break;
+    }
+  }
+  EmitOMPAtomicExpr(*this, Kind, IsSeqCst, S.getX(), S.getV(), S.getExpr(),
+                    S.getLocStart());
 }
 
 void CodeGenFunction::EmitOMPTargetDirective(const OMPTargetDirective &) {
Index: test/OpenMP/atomic_read_codegen.c
===================================================================
--- test/OpenMP/atomic_read_codegen.c
+++ test/OpenMP/atomic_read_codegen.c
@@ -0,0 +1,222 @@
+// RUN: %clang_cc1 -verify -triple x86_64-apple-darwin10 -fopenmp=libiomp5 -x c -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-apple-darwin10 -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp=libiomp5 -x c -triple x86_64-apple-darwin10 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s
+// expected-no-diagnostics
+
+#ifndef HEADER
+#define HEADER
+
+_Bool bv, bx;
+char cv, cx;
+unsigned char ucv, ucx;
+short sv, sx;
+unsigned short usv, usx;
+int iv, ix;
+unsigned int uiv, uix;
+long lv, lx;
+unsigned long ulv, ulx;
+long long llv, llx;
+unsigned long long ullv, ullx;
+float fv, fx;
+double dv, dx;
+long double ldv, ldx;
+_Complex int civ, cix;
+_Complex float cfv, cfx;
+_Complex double cdv, cdx;
+
+typedef int v4si __attribute__((__vector_size__(16)));
+v4si v4six;
+
+struct BitFields {
+  float f;
+  int a : 31;
+} bfx;
+
+typedef float float2 __attribute__((ext_vector_type(2)));
+float2 float2x;
+
+register int rix __asm__("0");
+
+int main() {
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  bv = bx;
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  cv = cx;
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  ucv = ucx;
+// CHECK: load atomic i16*
+// CHECK: store i16
+#pragma omp atomic read
+  sv = sx;
+// CHECK: load atomic i16*
+// CHECK: store i16
+#pragma omp atomic read
+  usv = usx;
+// CHECK: load atomic i32*
+// CHECK: store i32
+#pragma omp atomic read
+  iv = ix;
+// CHECK: load atomic i32*
+// CHECK: store i32
+#pragma omp atomic read
+  uiv = uix;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  lv = lx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  ulv = ulx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  llv = llx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  ullv = ullx;
+// CHECK: load atomic i32* bitcast (float*
+// CHECK: bitcast i32 {{.*}} to float
+// CHECK: store float
+#pragma omp atomic read
+  fv = fx;
+// CHECK: load atomic i64* bitcast (double*
+// CHECK: bitcast i64 {{.*}} to double
+// CHECK: store double
+#pragma omp atomic read
+  dv = dx;
+// CHECK: [[LD:%.+]] = load atomic i128* bitcast (x86_fp80*
+// CHECK: [[BITCAST:%.+]] = bitcast x86_fp80* [[LDTEMP:%.*]] to i128*
+// CHECK: store i128 [[LD]], i128* [[BITCAST]]
+// CHECK: [[LD:%.+]] = load x86_fp80* [[LDTEMP]]
+// CHECK: store x86_fp80 [[LD]]
+#pragma omp atomic read
+  ldv = ldx;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store i32
+// CHECK: store i32
+#pragma omp atomic read
+  civ = cix;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store float
+// CHECK: store float
+#pragma omp atomic read
+  cfv = cfx;
+// CHECK: call{{.*}} void @__atomic_load(i64 16,
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store double
+// CHECK: store double
+#pragma omp atomic seq_cst read
+  cdv = cdx;
+// CHECK: load atomic i64*
+// CHECK: store i8
+#pragma omp atomic read
+  bv = ulx;
+// CHECK: load atomic i8*
+// CHECK: store i8
+#pragma omp atomic read
+  cv = bx;
+// CHECK: load atomic i8*
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store i8
+#pragma omp atomic read, seq_cst
+  ucv = cx;
+// CHECK: load atomic i64*
+// CHECK: store i16
+#pragma omp atomic read
+  sv = ulx;
+// CHECK: load atomic i64*
+// CHECK: store i16
+#pragma omp atomic read
+  usv = lx;
+// CHECK: load atomic i32*
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store i32
+#pragma omp atomic seq_cst, read
+  iv = uix;
+// CHECK: load atomic i32*
+// CHECK: store i32
+#pragma omp atomic read
+  uiv = ix;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store i64
+#pragma omp atomic read
+  lv = cix;
+// CHECK: load atomic i32*
+// CHECK: store i64
+#pragma omp atomic read
+  ulv = fx;
+// CHECK: load atomic i64*
+// CHECK: store i64
+#pragma omp atomic read
+  llv = dx;
+// CHECK: load atomic i128*
+// CHECK: store i64
+#pragma omp atomic read
+  ullv = ldx;
+// CHECK: call{{.*}} void @__atomic_load(i64 8,
+// CHECK: store float
+#pragma omp atomic read
+  fv = cix;
+// CHECK: load atomic i16*
+// CHECK: store double
+#pragma omp atomic read
+  dv = sx;
+// CHECK: load atomic i8*
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bx;
+// CHECK: load atomic i8*
+// CHECK: store i32
+// CHECK: store i32
+#pragma omp atomic read
+  civ = bx;
+// CHECK: load atomic i16*
+// CHECK: store float
+// CHECK: store float
+#pragma omp atomic read
+  cfv = usx;
+// CHECK: load atomic i64*
+// CHECK: store double
+// CHECK: store double
+#pragma omp atomic read
+  cdv = llx;
+// CHECK: [[BITCAST:%.+]] = bitcast <4 x i32>* [[LDTEMP:%.+]] to i8*
+// CHECK: call{{.*}} void @__atomic_load(i64 ptrtoint (<4 x i32>* getelementptr (<4 x i32>* null, i32 1) to i64), i8* bitcast (<4 x i32>* {{@.+}} to i8*), i8* [[BITCAST]], i32 5)
+// CHECK: [[LD:%.+]] = load <4 x i32>* [[LDTEMP]]
+// CHECK: extractelement <4 x i32> [[LD]]
+// CHECK: store i8
+#pragma omp atomic read
+  bv = v4six[0];
+// CHECK: [[BITCAST:%.+]] = bitcast i32* [[LDTEMP:%.+]] to i8*
+// CHECK: call{{.*}} void @__atomic_load(i64 ptrtoint (i32* getelementptr (i32* null, i32 1) to i64), i8* bitcast (i32* {{.+}} to i8*), i8* [[BITCAST]], i32 5)
+// CHECK: [[LD:%.+]] = load i32* [[LDTEMP]]
+// CHECK: shl i32 [[LD]]
+// CHECK: store x86_fp80
+#pragma omp atomic read
+  ldv = bfx.a;
+// CHECK: [[BITCAST:%.+]] = bitcast <2 x float>* [[LDTEMP:%.+]] to i8*
+// CHECK: call{{.*}} void @__atomic_load(i64 ptrtoint (<2 x float>* getelementptr (<2 x float>* null, i32 1) to i64), i8* bitcast (<2 x float>* {{@.+}} to i8*), i8* [[BITCAST]], i32 5)
+// CHECK: [[LD:%.+]] = load <2 x float>* [[LDTEMP]]
+// CHECK: extractelement <2 x float> [[LD]]
+// CHECK: store i64
+#pragma omp atomic read
+  ulv = float2x.x;
+// CHECK: call{{.*}} void @__kmpc_atomic_start(
+// CHECK: call{{.*}} i{{[0-9]+}} @llvm.read_register
+// CHECK: call{{.*}} void @__kmpc_atomic_end(
+// CHECK: call{{.*}} @__kmpc_flush(
+// CHECK: store double
+#pragma omp atomic read seq_cst
+  dv = rix;
+  return 0;
+}
+
+#endif