Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
===================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -308,6 +308,11 @@
   return visitAllocSite(AI);
 }
 
+// Are we allowed to form a atomic load or store of this type?
+static bool isSupportedAtomicType(Type *Ty) {
+  return Ty->isIntegerTy() || Ty->isPointerTy() || Ty->isFloatingPointTy();
+}
+
 /// \brief Helper to combine a load to a new type.
 ///
 /// This just does the work of combining a load to a new type. It handles
@@ -319,6 +324,9 @@
 /// point the \c InstCombiner currently is using.
 static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
                                       const Twine &Suffix = "") {
+  assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
+         "can't fold an atomic load to requested type");
+  
   Value *Ptr = LI.getPointerOperand();
   unsigned AS = LI.getPointerAddressSpace();
   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
@@ -400,6 +408,9 @@
 ///
 /// Returns the newly created store instruction.
 static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
+  assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
+         "can't fold an atomic store of requested type");
+  
   Value *Ptr = SI.getPointerOperand();
   unsigned AS = SI.getPointerAddressSpace();
   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
@@ -514,14 +525,14 @@
   // as long as those are noops (i.e., the source or dest type have the same
   // bitwidth as the target's pointers).
   if (LI.hasOneUse())
-    if (auto* CI = dyn_cast<CastInst>(LI.user_back())) {
-      if (CI->isNoopCast(DL)) {
-        LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
-        CI->replaceAllUsesWith(NewLoad);
-        IC.eraseInstFromFunction(*CI);
-        return &LI;
-      }
-    }
+    if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
+      if (CI->isNoopCast(DL))
+        if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
+          LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
+          CI->replaceAllUsesWith(NewLoad);
+          IC.eraseInstFromFunction(*CI);
+          return &LI;
+        }
 
   // FIXME: We should also canonicalize loads of vectors when their elements are
   // cast to other types.
@@ -1026,14 +1037,17 @@
   // Fold away bit casts of the stored value by storing the original type.
   if (auto *BC = dyn_cast<BitCastInst>(V)) {
     V = BC->getOperand(0);
-    combineStoreToNewValue(IC, SI, V);
-    return true;
+    if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
+      combineStoreToNewValue(IC, SI, V);
+      return true;
+    }
   }
 
-  if (Value *U = likeBitCastFromVector(IC, V)) {
-    combineStoreToNewValue(IC, SI, U);
-    return true;
-  }
+  if (Value *U = likeBitCastFromVector(IC, V))
+    if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
+      combineStoreToNewValue(IC, SI, U);
+      return true;
+    }
 
   // FIXME: We should also canonicalize stores of vectors when their elements
   // are cast to other types.
Index: llvm/trunk/test/Transforms/InstCombine/atomic.ll
===================================================================
--- llvm/trunk/test/Transforms/InstCombine/atomic.ll
+++ llvm/trunk/test/Transforms/InstCombine/atomic.ll
@@ -267,3 +267,23 @@
   store atomic i8* %l, i8** %p2 seq_cst, align 8
   ret void
 }
+
+;; At the moment, we can't form atomic vectors by folding since these are 
+;; not representable in the IR.  This was pr29121.  The right long term
+;; solution is to extend the IR to handle this case.
+define <2 x float> @no_atomic_vector_load(i64* %p) {
+; CHECK-LABEL @no_atomic_vector_load
+; CHECK: load atomic i64, i64* %p unordered, align 8
+  %load = load atomic i64, i64* %p unordered, align 8
+  %.cast = bitcast i64 %load to <2 x float>
+  ret <2 x float> %.cast
+}
+
+define void @no_atomic_vector_store(<2 x float> %p, i8* %p2) {
+; CHECK-LABEL: @no_atomic_vector_store
+; CHECK: store atomic i64 %1, i64* %2 unordered, align 8
+  %1 = bitcast <2 x float> %p to i64
+  %2 = bitcast i8* %p2 to i64*
+  store atomic i64 %1, i64* %2 unordered, align 8
+  ret void
+}