Index: llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td +++ llvm/trunk/include/llvm/IR/IntrinsicsNVVM.td @@ -3673,11 +3673,19 @@ class PTXReadSRegIntrinsic_r32 : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; - class PTXReadSRegIntrinsic_r64 : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>, GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; +// Intrinsics to read registers with non-constant values. E.g. the values that +// do change over the kernel lifetime. Such reads should not be CSE'd. +class PTXReadNCSRegIntrinsic_r32 + : Intrinsic<[llvm_i32_ty], [], [IntrInaccessibleMemOnly]>, + GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; +class PTXReadNCSRegIntrinsic_r64 + : Intrinsic<[llvm_i64_ty], [], [IntrInaccessibleMemOnly]>, + GCCBuiltin<"__nvvm_read_ptx_sreg_" # name>; + defm int_nvvm_read_ptx_sreg_tid : PTXReadSRegIntrinsic_v4i32<"tid">; defm int_nvvm_read_ptx_sreg_ntid : PTXReadSRegIntrinsic_v4i32<"ntid">; @@ -3703,13 +3711,13 @@ def int_nvvm_read_ptx_sreg_lanemask_gt : PTXReadSRegIntrinsic_r32<"lanemask_gt">; -def int_nvvm_read_ptx_sreg_clock : PTXReadSRegIntrinsic_r32<"clock">; -def int_nvvm_read_ptx_sreg_clock64 : PTXReadSRegIntrinsic_r64<"clock64">; +def int_nvvm_read_ptx_sreg_clock : PTXReadNCSRegIntrinsic_r32<"clock">; +def int_nvvm_read_ptx_sreg_clock64 : PTXReadNCSRegIntrinsic_r64<"clock64">; -def int_nvvm_read_ptx_sreg_pm0 : PTXReadSRegIntrinsic_r32<"pm0">; -def int_nvvm_read_ptx_sreg_pm1 : PTXReadSRegIntrinsic_r32<"pm1">; -def int_nvvm_read_ptx_sreg_pm2 : PTXReadSRegIntrinsic_r32<"pm2">; -def int_nvvm_read_ptx_sreg_pm3 : PTXReadSRegIntrinsic_r32<"pm3">; +def int_nvvm_read_ptx_sreg_pm0 : PTXReadNCSRegIntrinsic_r32<"pm0">; +def int_nvvm_read_ptx_sreg_pm1 : PTXReadNCSRegIntrinsic_r32<"pm1">; +def int_nvvm_read_ptx_sreg_pm2 : PTXReadNCSRegIntrinsic_r32<"pm2">; +def int_nvvm_read_ptx_sreg_pm3 : PTXReadNCSRegIntrinsic_r32<"pm3">; def int_nvvm_read_ptx_sreg_warpsize : PTXReadSRegIntrinsic_r32<"warpsize">; Index: llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll +++ llvm/trunk/test/CodeGen/NVPTX/intrinsics.ll @@ -94,6 +94,43 @@ ret i32 %zext } +; Most of nvvm.read.ptx.sreg.* intrinsics always return the same value and may +; be CSE'd. +; CHECK-LABEL: test_tid +define i32 @test_tid() { +; CHECK: mov.u32 %r{{.*}}, %tid.x; + %a = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +; CHECK-NOT: mov.u32 %r{{.*}}, %tid.x; + %b = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x() + %ret = add i32 %a, %b +; CHECK: ret + ret i32 %ret +} + +; reading clock() or clock64() should not be CSE'd as each read may return +; different value. +; CHECK-LABEL: test_clock +define i32 @test_clock() { +; CHECK: mov.u32 %r{{.*}}, %clock; + %a = tail call i32 @llvm.nvvm.read.ptx.sreg.clock() +; CHECK: mov.u32 %r{{.*}}, %clock; + %b = tail call i32 @llvm.nvvm.read.ptx.sreg.clock() + %ret = add i32 %a, %b +; CHECK: ret + ret i32 %ret +} + +; CHECK-LABEL: test_clock64 +define i64 @test_clock64() { +; CHECK: mov.u64 %r{{.*}}, %clock64; + %a = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64() +; CHECK: mov.u64 %r{{.*}}, %clock64; + %b = tail call i64 @llvm.nvvm.read.ptx.sreg.clock64() + %ret = add i64 %a, %b +; CHECK: ret + ret i64 %ret +} + declare float @llvm.fabs.f32(float) declare double @llvm.fabs.f64(double) declare float @llvm.nvvm.sqrt.f(float) @@ -103,3 +140,7 @@ declare i16 @llvm.ctpop.i16(i16) declare i32 @llvm.ctpop.i32(i32) declare i64 @llvm.ctpop.i64(i64) + +declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() +declare i32 @llvm.nvvm.read.ptx.sreg.clock() +declare i64 @llvm.nvvm.read.ptx.sreg.clock64()