Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -327,6 +327,7 @@
   LoadInst *NewLoad = IC.Builder->CreateAlignedLoad(
       IC.Builder->CreateBitCast(Ptr, NewTy->getPointerTo(AS)),
       LI.getAlignment(), LI.getName() + Suffix);
+  NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope());
   MDBuilder MDB(NewLoad->getContext());
   for (const auto &MDPair : MD) {
     unsigned ID = MDPair.first;
@@ -399,6 +400,7 @@
   StoreInst *NewStore = IC.Builder->CreateAlignedStore(
       V, IC.Builder->CreateBitCast(Ptr, V->getType()->getPointerTo(AS)),
       SI.getAlignment());
+  NewStore->setAtomic(SI.getOrdering(), SI.getSynchScope());
   for (const auto &MDPair : MD) {
     unsigned ID = MDPair.first;
     MDNode *N = MDPair.second;
@@ -456,9 +458,9 @@
 /// later. However, it is risky in case some backend or other part of LLVM is
 /// relying on the exact type loaded to select appropriate atomic operations.
 static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
-  // FIXME: We could probably with some care handle both volatile and atomic
-  // loads here but it isn't clear that this is important.
-  if (!LI.isSimple())
+  // FIXME: We could probably with some care handle both volatile and ordered
+  // atomic loads here but it isn't clear that this is important.
+  if (!LI.isUnordered())
     return nullptr;
 
   if (LI.use_empty())
@@ -934,9 +936,9 @@
 /// the store instruction as otherwise there is no way to signal whether it was
 /// combined or not: IC.EraseInstFromFunction returns a null pointer.
 static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
-  // FIXME: We could probably with some care handle both volatile and atomic
-  // stores here but it isn't clear that this is important.
-  if (!SI.isSimple())
+  // FIXME: We could probably with some care handle both volatile and ordered
+  // atomic stores here but it isn't clear that this is important.
+  if (!SI.isUnordered())
     return false;
 
   Value *V = SI.getValueOperand();
Index: test/Transforms/InstCombine/atomic.ll
===================================================================
--- test/Transforms/InstCombine/atomic.ll
+++ test/Transforms/InstCombine/atomic.ll
@@ -172,3 +172,42 @@
   %x = load atomic i32, i32* %addr seq_cst, align 4
   ret i32 %x
 }
+
+declare void @clobber()
+
+define i32 @test18(float* %p) {
+; CHECK-LABEL: define i32 @test18(
+; CHECK: load atomic i32, i32* [[A:%.*]] unordered, align 4
+; CHECK: store atomic i32 [[B:%.*]], i32* [[C:%.*]] unordered, align 4
+  %x = load atomic float, float* %p unordered, align 4
+  call void @clobber() ;; keep the load around
+  store atomic float %x, float* %p unordered, align 4
+  ret i32 0
+}
+
+; TODO: probably also legal in this case
+define i32 @test19(float* %p) {
+; CHECK-LABEL: define i32 @test19(
+; CHECK: load atomic float, float* %p seq_cst, align 4
+; CHECK: store atomic float %x, float* %p seq_cst, align 4
+  %x = load atomic float, float* %p seq_cst, align 4
+  call void @clobber() ;; keep the load around
+  store atomic float %x, float* %p seq_cst, align 4
+  ret i32 0
+}
+
+define i32 @test20(i32** %p, i8* %v) {
+; CHECK-LABEL: define i32 @test20(
+; CHECK: store atomic i8* %v, i8** [[D:%.*]] unordered, align 4
+  %cast = bitcast i8* %v to i32*
+  store atomic i32* %cast, i32** %p unordered, align 4
+  ret i32 0
+}
+
+define i32 @test21(i32** %p, i8* %v) {
+; CHECK-LABEL: define i32 @test21(
+; CHECK: store atomic i32* %cast, i32** %p monotonic, align 4
+  %cast = bitcast i8* %v to i32*
+  store atomic i32* %cast, i32** %p monotonic, align 4
+  ret i32 0
+}