Index: lib/Analysis/MemoryDependenceAnalysis.cpp
===================================================================
--- lib/Analysis/MemoryDependenceAnalysis.cpp
+++ lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -617,7 +617,16 @@
     }
 
     if (isInvariantLoad)
-       continue;
+      continue;
+
+    // A release fence requires that all stores complete before it, but does
+    // not prevent the reordering of following loads or stores 'before' the
+    // fence.  As a result, we look past it when finding a dependency for
+    // loads.  DSE uses this to find preceeding stores and thus we can't bypass
+    // the fence if the query inst is a store.
+    if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
+      if (isLoad && FI->getOrdering() == Release)
+        continue;
 
     // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer.
     AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc);
Index: test/Transforms/DeadStoreElimination/fence.ll
===================================================================
--- /dev/null
+++ test/Transforms/DeadStoreElimination/fence.ll
@@ -0,0 +1,28 @@
+; RUN: opt -S -basicaa -dse < %s | FileCheck %s
+
+; We can NOT dead store eliminate accross the fence
+define void @test1(i32* %addr.i) {
+; CHECK-LABEL: @test1
+; CHECK: store
+; CHECK: fence
+; CHECK: store
+; CHECK: ret
+  store i32 5, i32* %addr.i, align 4
+  fence release
+  store i32 5, i32* %addr.i, align 4
+  ret void
+}
+
+; We *could* DSE across this fence, but don't.  No other thread can
+; observe the order of the acquire fence and the store.
+define void @test2(i32* %addr.i) {
+; CHECK-LABEL: @test2
+; CHECK: store
+; CHECK: fence
+; CHECK: store
+; CHECK: ret
+  store i32 5, i32* %addr.i, align 4
+  fence acquire
+  store i32 5, i32* %addr.i, align 4
+  ret void
+}
Index: test/Transforms/GVN/fence.ll
===================================================================
--- /dev/null
+++ test/Transforms/GVN/fence.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
+
+; We can value forward across the fence since we can (semantically) 
+; reorder the following load before the fence.
+define i32 @test(i32* %addr.i) {
+; CHECK-LABEL: @test
+; CHECK: store
+; CHECK: fence
+; CHECK-NOT: load
+; CHECK: ret
+  store i32 5, i32* %addr.i, align 4
+  fence release
+  %a = load i32, i32* %addr.i, align 4
+  ret i32 %a
+}
+
+; Same as above
+define i32 @test2(i32* %addr.i) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: fence
+; CHECK-NOT: load
+; CHECK: ret
+  %a = load i32, i32* %addr.i, align 4
+  fence release
+  %a2 = load i32, i32* %addr.i, align 4
+  %res = sub i32 %a, %a2
+  ret i32 %res
+}
+
+; We can not value forward across an acquire barrier since we might
+; be syncronizing with another thread storing to the same variable
+; followed by a release fence.  If this thread observed the release 
+; had happened, we must present a consistent view of memory at the fence.
+define i32 @test3(i32* noalias %addr.i, i32* noalias %otheraddr) {
+; CHECK-LABEL: @test3
+; CHECK: load
+; CHECK: fence
+; CHECK: load
+; CHECK: ret
+  %a = load i32, i32* %addr.i, align 4
+  fence acquire
+  %a2 = load i32, i32* %addr.i, align 4
+  %res = sub i32 %a, %a2
+  ret i32 %res
+}