Index: llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
===================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
+++ llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td
@@ -3673,11 +3673,19 @@
 class PTXReadSRegIntrinsic_r32<string name>
   : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
     GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
-
 class PTXReadSRegIntrinsic_r64<string name>
   : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
     GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
 
+// Intrinsics to read registers with non-constant values. E.g. the values that
+// do change over the kernel lifetime. Such reads should not be CSE'd.
+class PTXReadNCSRegIntrinsic_r32<string name>
+  : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>,
+    GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+class PTXReadNCSRegIntrinsic_r64<string name>
+  : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly]>,
+    GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>;
+
 defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">;
 defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">;
 
@@ -3703,13 +3711,13 @@
 def int_nvvm_read_ptx_sreg_lanemask_gt :
     PTXReadSRegIntrinsic_r32<"lanemask_gt">;
 
-def int_nvvm_read_ptx_sreg_clock : PTXReadSRegIntrinsic_r32<"clock">;
-def int_nvvm_read_ptx_sreg_clock64 : PTXReadSRegIntrinsic_r64<"clock64">;
+def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">;
+def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">;
 
-def int_nvvm_read_ptx_sreg_pm0 : PTXReadSRegIntrinsic_r32<"pm0">;
-def int_nvvm_read_ptx_sreg_pm1 : PTXReadSRegIntrinsic_r32<"pm1">;
-def int_nvvm_read_ptx_sreg_pm2 : PTXReadSRegIntrinsic_r32<"pm2">;
-def int_nvvm_read_ptx_sreg_pm3 : PTXReadSRegIntrinsic_r32<"pm3">;
+def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">;
+def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">;
+def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">;
+def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">;
 
 def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">;
 
Index: llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll
===================================================================
--- llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll
+++ llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll
@@ -94,6 +94,43 @@
   ret i32 %zext
 }
 
+; Most of nvvm.read.ptx.sreg.* intrinsics always return the same value and may
+; be CSE'd.
+; CHECK-LABEL: test_tid
+define i32 @test_tid() {
+; CHECK: mov.u32         %r{{.*}}, %tid.x;
+  %a = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+; CHECK-NOT: mov.u32         %r{{.*}}, %tid.x;
+  %b = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+  %ret = add i32 %a, %b
+; CHECK: ret
+  ret i32 %ret
+}
+
+; reading clock() or clock64() should not be CSE'd as each read may return
+; different value.
+; CHECK-LABEL: test_clock
+define i32 @test_clock() {
+; CHECK: mov.u32         %r{{.*}}, %clock;
+  %a = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
+; CHECK: mov.u32         %r{{.*}}, %clock;
+  %b = tail call i32 @llvm.nvvm.read.ptx.sreg.clock()
+  %ret = add i32 %a, %b
+; CHECK: ret
+  ret i32 %ret
+}
+
+; CHECK-LABEL: test_clock64
+define i64 @test_clock64() {
+; CHECK: mov.u64         %r{{.*}}, %clock64;
+  %a = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
+; CHECK: mov.u64         %r{{.*}}, %clock64;
+  %b = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64()
+  %ret = add i64 %a, %b
+; CHECK: ret
+  ret i64 %ret
+}
+
 declare float @llvm.fabs.f32(float)
 declare double @llvm.fabs.f64(double)
 declare float @llvm.nvvm.sqrt.f(float)
@@ -103,3 +140,7 @@
 declare i16 @llvm.ctpop.i16(i16)
 declare i32 @llvm.ctpop.i32(i32)
 declare i64 @llvm.ctpop.i64(i64)
+
+declare i32 @llvm.nvvm.read.ptx.sreg.tid.x()
+declare i32 @llvm.nvvm.read.ptx.sreg.clock()
+declare i64 @llvm.nvvm.read.ptx.sreg.clock64()