diff --git a/llvm/test/CodeGen/NVPTX/b52037.ll b/llvm/test/CodeGen/NVPTX/b52037.ll --- a/llvm/test/CodeGen/NVPTX/b52037.ll +++ b/llvm/test/CodeGen/NVPTX/b52037.ll @@ -23,14 +23,14 @@ %struct.foo = type <{ i16*, %float4, %int3, i32, %float3, [4 x i8], i64, i32, i8, [3 x i8], i32 }> @global = external local_unnamed_addr addrspace(4) externally_initialized global [27 x %char3], align 1 -@global.1 = linkonce_odr unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* inttoptr (i64 16 to i8*), i8* null, i8* null] }, align 8 +@global_1 = linkonce_odr unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* inttoptr (i64 16 to i8*), i8* null, i8* null] }, align 8 ; Function Attrs: argmemonly mustprogress nofree nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0 declare %float4 @snork(float) local_unnamed_addr -declare %float3 @bar.2(float, float) local_unnamed_addr +declare %float3 @bar_2(float, float) local_unnamed_addr declare %float3 @zot() local_unnamed_addr @@ -49,7 +49,7 @@ %0 = bitcast %float4* %tmp9 to i16** store i16* %tmp5, i16** %0, align 8 %tmp10 = getelementptr inbounds %struct.zot, %struct.zot* %tmp, i64 0, i32 0, i32 0 - store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @global.1, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)*** %tmp10, align 16 + store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @global_1, i64 0, inrange i32 0, i64 3) to i32 (...)**), i32 (...)*** %tmp10, align 16 %tmp34 = getelementptr %struct.spam.2, %struct.spam.2* %arg, i64 0, i32 0, i32 0 %tmp.i1 = tail call i64 @foo() %tmp44.i16 = getelementptr inbounds i16, i16* %tmp5, i64 undef @@ -100,7 +100,7 @@ %tmp22.i.i.peel = fsub contract float %tmp19.i.i, %tmp15.i.i.peel %tmp17.i.i.peel = extractvalue %float3 %tmp13.i.i.peel, 2 %tmp27.i.i.peel = fsub contract float %tmp24.i.i, %tmp17.i.i.peel - %tmp28.i.i.peel = tail call %float3 @bar.2(float %tmp22.i.i.peel, float %tmp27.i.i.peel) #1 + %tmp28.i.i.peel = tail call %float3 @bar_2(float %tmp22.i.i.peel, float %tmp27.i.i.peel) #1 %tmp28.i.elt.i.peel = extractvalue %float3 %tmp28.i.i.peel, 0 store float %tmp28.i.elt.i.peel, float* %tmp59.i, align 16 %tmp28.i.elt2.i.peel = extractvalue %float3 %tmp28.i.i.peel, 1 @@ -171,7 +171,7 @@ %tmp22.i.i = fsub contract float %tmp19.i.i, %tmp15.i.i %tmp17.i.i = extractvalue %float3 %tmp13.i.i, 2 %tmp27.i.i = fsub contract float %tmp24.i.i, %tmp17.i.i - %tmp28.i.i = tail call %float3 @bar.2(float %tmp22.i.i, float %tmp27.i.i) #1 + %tmp28.i.i = tail call %float3 @bar_2(float %tmp22.i.i, float %tmp27.i.i) #1 %tmp28.i.elt.i = extractvalue %float3 %tmp28.i.i, 0 store float %tmp28.i.elt.i, float* %tmp59.i, align 16 %tmp28.i.elt2.i = extractvalue %float3 %tmp28.i.i, 1 diff --git a/llvm/test/CodeGen/NVPTX/barrier.ll b/llvm/test/CodeGen/NVPTX/barrier.ll --- a/llvm/test/CodeGen/NVPTX/barrier.ll +++ b/llvm/test/CodeGen/NVPTX/barrier.ll @@ -4,10 +4,10 @@ declare void @llvm.nvvm.barrier.sync(i32) declare void @llvm.nvvm.barrier.sync.cnt(i32, i32) -; CHECK-LABEL: .func{{.*}}barrier.sync -define void @barrier.sync(i32 %id, i32 %cnt) { - ; CHECK: ld.param.u32 [[ID:%r[0-9]+]], [barrier.sync_param_0]; - ; CHECK: ld.param.u32 [[CNT:%r[0-9]+]], [barrier.sync_param_1]; +; CHECK-LABEL: .func{{.*}}barrier_sync +define void @barrier_sync(i32 %id, i32 %cnt) { + ; CHECK: ld.param.u32 [[ID:%r[0-9]+]], [barrier_sync_param_0]; + ; CHECK: ld.param.u32 [[CNT:%r[0-9]+]], [barrier_sync_param_1]; ; CHECK: barrier.sync [[ID]], [[CNT]]; call void @llvm.nvvm.barrier.sync.cnt(i32 %id, i32 %cnt) diff --git a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py --- a/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py +++ b/llvm/test/CodeGen/NVPTX/ld-st-addrrspace.py @@ -44,7 +44,8 @@ def gen_load_tests(): load_template = """ -define ${type} @ld${_volatile}${_space}.${ptx_type}(${type} addrspace(${asid})* %ptr) { +define ${type} @${testname}(${type} addrspace(${asid})* %ptr) { +; CHECK: ${testname} ; CHECK_P32: ld${_volatile}${_volatile_as}.${ptx_type} %${ptx_reg}{{[0-9]+}}, [%r{{[0-9]+}}] ; CHECK_P64: ld${_volatile}${_volatile_as}.${ptx_type} %${ptx_reg}{{[0-9]+}}, [%rd{{[0-9]+}}] ; CHECK: ret @@ -80,6 +81,10 @@ "asid": addrspace_id[space], } + testname = \ + Template("ld_${_volatile}${_space}.${ptx_type}").substitute(params) + params["testname"] = testname.replace(".", "_") + # LLVM does not accept "addrspacecast Type* addrspace(0) to Type*", so we # need to avoid it for generic pointer tests. if space: diff --git a/llvm/test/CodeGen/NVPTX/match.ll b/llvm/test/CodeGen/NVPTX/match.ll --- a/llvm/test/CodeGen/NVPTX/match.ll +++ b/llvm/test/CodeGen/NVPTX/match.ll @@ -3,10 +3,10 @@ declare i32 @llvm.nvvm.match.any.sync.i32(i32, i32) declare i32 @llvm.nvvm.match.any.sync.i64(i32, i64) -; CHECK-LABEL: .func{{.*}}match.any.sync.i32 -define i32 @match.any.sync.i32(i32 %mask, i32 %value) { - ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]], [match.any.sync.i32_param_0]; - ; CHECK: ld.param.u32 [[VALUE:%r[0-9]+]], [match.any.sync.i32_param_1]; +; CHECK-LABEL: .func{{.*}}match_any_sync_i32 +define i32 @match_any_sync_i32(i32 %mask, i32 %value) { + ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]], [match_any_sync_i32_param_0]; + ; CHECK: ld.param.u32 [[VALUE:%r[0-9]+]], [match_any_sync_i32_param_1]; ; CHECK: match.any.sync.b32 [[V0:%r[0-9]+]], [[VALUE]], [[MASK]]; %v0 = call i32 @llvm.nvvm.match.any.sync.i32(i32 %mask, i32 %value) @@ -22,10 +22,10 @@ ret i32 %sum3; } -; CHECK-LABEL: .func{{.*}}match.any.sync.i64 -define i32 @match.any.sync.i64(i32 %mask, i64 %value) { - ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]], [match.any.sync.i64_param_0]; - ; CHECK: ld.param.u64 [[VALUE:%rd[0-9]+]], [match.any.sync.i64_param_1]; +; CHECK-LABEL: .func{{.*}}match_any_sync_i64 +define i32 @match_any_sync_i64(i32 %mask, i64 %value) { + ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]], [match_any_sync_i64_param_0]; + ; CHECK: ld.param.u64 [[VALUE:%rd[0-9]+]], [match_any_sync_i64_param_1]; ; CHECK: match.any.sync.b64 [[V0:%r[0-9]+]], [[VALUE]], [[MASK]]; %v0 = call i32 @llvm.nvvm.match.any.sync.i64(i32 %mask, i64 %value) @@ -44,10 +44,10 @@ declare {i32, i1} @llvm.nvvm.match.all.sync.i32p(i32, i32) declare {i32, i1} @llvm.nvvm.match.all.sync.i64p(i32, i64) -; CHECK-LABEL: .func{{.*}}match.all.sync.i32p( -define {i32,i1} @match.all.sync.i32p(i32 %mask, i32 %value) { - ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]], [match.all.sync.i32p_param_0]; - ; CHECK: ld.param.u32 [[VALUE:%r[0-9]+]], [match.all.sync.i32p_param_1]; +; CHECK-LABEL: .func{{.*}}match_all_sync_i32p( +define {i32,i1} @match_all_sync_i32p(i32 %mask, i32 %value) { + ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]], [match_all_sync_i32p_param_0]; + ; CHECK: ld.param.u32 [[VALUE:%r[0-9]+]], [match_all_sync_i32p_param_1]; ; CHECK: match.all.sync.b32 {{%r[0-9]+\|%p[0-9]+}}, [[VALUE]], [[MASK]]; %r1 = call {i32, i1} @llvm.nvvm.match.all.sync.i32p(i32 %mask, i32 %value) @@ -80,10 +80,10 @@ ret {i32, i1} %ret1; } -; CHECK-LABEL: .func{{.*}}match.all.sync.i64p( -define {i32,i1} @match.all.sync.i64p(i32 %mask, i64 %value) { - ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]], [match.all.sync.i64p_param_0]; - ; CHECK: ld.param.u64 [[VALUE:%rd[0-9]+]], [match.all.sync.i64p_param_1]; +; CHECK-LABEL: .func{{.*}}match_all_sync_i64p( +define {i32,i1} @match_all_sync_i64p(i32 %mask, i64 %value) { + ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]], [match_all_sync_i64p_param_0]; + ; CHECK: ld.param.u64 [[VALUE:%rd[0-9]+]], [match_all_sync_i64p_param_1]; ; CHECK: match.all.sync.b64 {{%r[0-9]+\|%p[0-9]+}}, [[VALUE]], [[MASK]]; %r1 = call {i32, i1} @llvm.nvvm.match.all.sync.i64p(i32 %mask, i64 %value) diff --git a/llvm/test/CodeGen/NVPTX/shfl-p.ll b/llvm/test/CodeGen/NVPTX/shfl-p.ll --- a/llvm/test/CodeGen/NVPTX/shfl-p.ll +++ b/llvm/test/CodeGen/NVPTX/shfl-p.ll @@ -9,8 +9,8 @@ declare {i32, i1} @llvm.nvvm.shfl.idx.i32p(i32, i32, i32) declare {float, i1} @llvm.nvvm.shfl.idx.f32p(float, i32, i32) -; CHECK-LABEL: .func{{.*}}shfl.i32.rrr -define {i32, i1} @shfl.i32.rrr(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_i32_rrr +define {i32, i1} @shfl_i32_rrr(i32 %a, i32 %b, i32 %c) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] @@ -20,8 +20,8 @@ ret {i32, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.i32.irr -define {i32, i1} @shfl.i32.irr(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_i32_irr +define {i32, i1} @shfl_i32_irr(i32 %a, i32 %b, i32 %c) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] @@ -31,8 +31,8 @@ ret {i32, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.i32.rri -define {i32, i1} @shfl.i32.rri(i32 %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_i32_rri +define {i32, i1} @shfl_i32_rri(i32 %a, i32 %b) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], 1; @@ -41,8 +41,8 @@ ret {i32, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.i32.iri -define {i32, i1} @shfl.i32.iri(i32 %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_i32_iri +define {i32, i1} @shfl_i32_iri(i32 %a, i32 %b) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], 2; @@ -51,8 +51,8 @@ ret {i32, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.i32.rir -define {i32, i1} @shfl.i32.rir(i32 %a, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_i32_rir +define {i32, i1} @shfl_i32_rir(i32 %a, i32 %c) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 1, [[C]]; @@ -61,8 +61,8 @@ ret {i32, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.i32.iir -define {i32, i1} @shfl.i32.iir(i32 %a, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_i32_iir +define {i32, i1} @shfl_i32_iir(i32 %a, i32 %c) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, [[C]]; @@ -71,8 +71,8 @@ ret {i32, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.i32.rii -define {i32, i1} @shfl.i32.rii(i32 %a) { +; CHECK-LABEL: .func{{.*}}shfl_i32_rii +define {i32, i1} @shfl_i32_rii(i32 %a) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 1, 2; ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] @@ -80,8 +80,8 @@ ret {i32, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.i32.iii -define {i32, i1} @shfl.i32.iii(i32 %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_i32_iii +define {i32, i1} @shfl_i32_iii(i32 %a, i32 %b) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, 3; ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] @@ -91,8 +91,8 @@ ;; Same intrinsics, but for float -; CHECK-LABEL: .func{{.*}}shfl.f32.rrr -define {float, i1} @shfl.f32.rrr(float %a, i32 %b, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_f32_rrr +define {float, i1} @shfl_f32_rrr(float %a, i32 %b, i32 %c) { ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] @@ -102,8 +102,8 @@ ret {float, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.f32.irr -define {float, i1} @shfl.f32.irr(float %a, i32 %b, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_f32_irr +define {float, i1} @shfl_f32_irr(float %a, i32 %b, i32 %c) { ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] @@ -113,8 +113,8 @@ ret {float, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.f32.rri -define {float, i1} @shfl.f32.rri(float %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_f32_rri +define {float, i1} @shfl_f32_rri(float %a, i32 %b) { ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], 1; @@ -123,8 +123,8 @@ ret {float, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.f32.iri -define {float, i1} @shfl.f32.iri(float %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_f32_iri +define {float, i1} @shfl_f32_iri(float %a, i32 %b) { ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], 2; @@ -133,8 +133,8 @@ ret {float, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.f32.rir -define {float, i1} @shfl.f32.rir(float %a, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_f32_rir +define {float, i1} @shfl_f32_rir(float %a, i32 %c) { ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 1, [[C]]; @@ -143,8 +143,8 @@ ret {float, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.f32.iir -define {float, i1} @shfl.f32.iir(float %a, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_f32_iir +define {float, i1} @shfl_f32_iir(float %a, i32 %c) { ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, [[C]]; @@ -153,8 +153,8 @@ ret {float, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.f32.rii -define {float, i1} @shfl.f32.rii(float %a) { +; CHECK-LABEL: .func{{.*}}shfl_f32_rii +define {float, i1} @shfl_f32_rii(float %a) { ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 1, 2; ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] @@ -162,8 +162,8 @@ ret {float, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.f32.iii -define {float, i1} @shfl.f32.iii(float %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_f32_iii +define {float, i1} @shfl_f32_iii(float %a, i32 %b) { ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, 3; ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] diff --git a/llvm/test/CodeGen/NVPTX/shfl-sync-p.ll b/llvm/test/CodeGen/NVPTX/shfl-sync-p.ll --- a/llvm/test/CodeGen/NVPTX/shfl-sync-p.ll +++ b/llvm/test/CodeGen/NVPTX/shfl-sync-p.ll @@ -9,8 +9,8 @@ declare {i32, i1} @llvm.nvvm.shfl.sync.idx.i32p(i32, i32, i32, i32) declare {float, i1} @llvm.nvvm.shfl.sync.idx.f32p(i32, float, i32, i32) -; CHECK-LABEL: .func{{.*}}shfl.sync.i32.rrr -define {i32, i1} @shfl.sync.i32.rrr(i32 %mask, i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_sync_i32_rrr +define {i32, i1} @shfl_sync_i32_rrr(i32 %mask, i32 %a, i32 %b, i32 %c) { ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] @@ -21,8 +21,8 @@ ret {i32, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.i32.irr -define {i32, i1} @shfl.sync.i32.irr(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_sync_i32_irr +define {i32, i1} @shfl_sync_i32_irr(i32 %a, i32 %b, i32 %c) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] @@ -32,8 +32,8 @@ ret {i32, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.i32.rri -define {i32, i1} @shfl.sync.i32.rri(i32 %mask, i32 %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_sync_i32_rri +define {i32, i1} @shfl_sync_i32_rri(i32 %mask, i32 %a, i32 %b) { ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] @@ -43,8 +43,8 @@ ret {i32, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.i32.iri -define {i32, i1} @shfl.sync.i32.iri(i32 %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_sync_i32_iri +define {i32, i1} @shfl_sync_i32_iri(i32 %a, i32 %b) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], 2, 1; @@ -53,8 +53,8 @@ ret {i32, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.i32.rir -define {i32, i1} @shfl.sync.i32.rir(i32 %mask, i32 %a, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_sync_i32_rir +define {i32, i1} @shfl_sync_i32_rir(i32 %mask, i32 %a, i32 %c) { ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] @@ -64,8 +64,8 @@ ret {i32, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.i32.iir -define {i32, i1} @shfl.sync.i32.iir(i32 %a, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_sync_i32_iir +define {i32, i1} @shfl_sync_i32_iir(i32 %a, i32 %c) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, [[C]], 1; @@ -74,8 +74,8 @@ ret {i32, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.i32.rii -define {i32, i1} @shfl.sync.i32.rii(i32 %mask, i32 %a) { +; CHECK-LABEL: .func{{.*}}shfl_sync_i32_rii +define {i32, i1} @shfl_sync_i32_rii(i32 %mask, i32 %a) { ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 1, 2, [[MASK]]; @@ -84,8 +84,8 @@ ret {i32, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.i32.iii -define {i32, i1} @shfl.sync.i32.iii(i32 %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_sync_i32_iii +define {i32, i1} @shfl_sync_i32_iii(i32 %a, i32 %b) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, 3, 1; ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] @@ -95,8 +95,8 @@ ;; Same intrinsics, but for float -; CHECK-LABEL: .func{{.*}}shfl.sync.f32.rrr -define {float, i1} @shfl.sync.f32.rrr(i32 %mask, float %a, i32 %b, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_sync_f32_rrr +define {float, i1} @shfl_sync_f32_rrr(i32 %mask, float %a, i32 %b, i32 %c) { ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] @@ -107,8 +107,8 @@ ret {float, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.f32.irr -define {float, i1} @shfl.sync.f32.irr(float %a, i32 %b, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_sync_f32_irr +define {float, i1} @shfl_sync_f32_irr(float %a, i32 %b, i32 %c) { ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] @@ -118,8 +118,8 @@ ret {float, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.f32.rri -define {float, i1} @shfl.sync.f32.rri(i32 %mask, float %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_sync_f32_rri +define {float, i1} @shfl_sync_f32_rri(i32 %mask, float %a, i32 %b) { ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] @@ -129,8 +129,8 @@ ret {float, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.f32.iri -define {float, i1} @shfl.sync.f32.iri(float %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_sync_f32_iri +define {float, i1} @shfl_sync_f32_iri(float %a, i32 %b) { ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] ; CHECK: shfl.sync.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], [[B]], 2, 1; @@ -139,8 +139,8 @@ ret {float, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.f32.rir -define {float, i1} @shfl.sync.f32.rir(i32 %mask, float %a, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_sync_f32_rir +define {float, i1} @shfl_sync_f32_rir(i32 %mask, float %a, i32 %c) { ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] @@ -150,8 +150,8 @@ ret {float, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.f32.iir -define {float, i1} @shfl.sync.f32.iir(float %a, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_sync_f32_iir +define {float, i1} @shfl_sync_f32_iir(float %a, i32 %c) { ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] ; CHECK: shfl.sync.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, [[C]], 1; @@ -160,8 +160,8 @@ ret {float, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.f32.rii -define {float, i1} @shfl.sync.f32.rii(i32 %mask, float %a) { +; CHECK-LABEL: .func{{.*}}shfl_sync_f32_rii +define {float, i1} @shfl_sync_f32_rii(i32 %mask, float %a) { ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: shfl.sync.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 1, 2, [[MASK]]; @@ -170,8 +170,8 @@ ret {float, i1} %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.f32.iii -define {float, i1} @shfl.sync.f32.iii(float %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_sync_f32_iii +define {float, i1} @shfl_sync_f32_iii(float %a, i32 %b) { ; CHECK: ld.param.f32 [[A:%f[0-9]+]] ; CHECK: shfl.sync.down.b32 [[OUT:%f[0-9]+]]|[[OUTP:%p[0-9]+]], [[A]], 2, 3, 1; ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] diff --git a/llvm/test/CodeGen/NVPTX/shfl-sync.ll b/llvm/test/CodeGen/NVPTX/shfl-sync.ll --- a/llvm/test/CodeGen/NVPTX/shfl-sync.ll +++ b/llvm/test/CodeGen/NVPTX/shfl-sync.ll @@ -9,8 +9,8 @@ declare i32 @llvm.nvvm.shfl.sync.idx.i32(i32, i32, i32, i32) declare float @llvm.nvvm.shfl.sync.idx.f32(float, i32, i32, i32) -; CHECK-LABEL: .func{{.*}}shfl.sync.rrr -define i32 @shfl.sync.rrr(i32 %mask, i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_sync_rrr +define i32 @shfl_sync_rrr(i32 %mask, i32 %a, i32 %b, i32 %c) { ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] @@ -21,8 +21,8 @@ ret i32 %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.irr -define i32 @shfl.sync.irr(i32 %a, i32 %b, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_sync_irr +define i32 @shfl_sync_irr(i32 %a, i32 %b, i32 %c) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] @@ -32,8 +32,8 @@ ret i32 %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.rri -define i32 @shfl.sync.rri(i32 %mask, i32 %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_sync_rri +define i32 @shfl_sync_rri(i32 %mask, i32 %a, i32 %b) { ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] @@ -43,8 +43,8 @@ ret i32 %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.iri -define i32 @shfl.sync.iri(i32 %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_sync_iri +define i32 @shfl_sync_iri(i32 %a, i32 %b) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[B:%r[0-9]+]] ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], [[B]], 2, 1; @@ -53,8 +53,8 @@ ret i32 %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.rir -define i32 @shfl.sync.rir(i32 %mask, i32 %a, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_sync_rir +define i32 @shfl_sync_rir(i32 %mask, i32 %a, i32 %c) { ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] @@ -64,8 +64,8 @@ ret i32 %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.iir -define i32 @shfl.sync.iir(i32 %a, i32 %c) { +; CHECK-LABEL: .func{{.*}}shfl_sync_iir +define i32 @shfl_sync_iir(i32 %a, i32 %c) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: ld.param.u32 [[C:%r[0-9]+]] ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 2, [[C]], 1; @@ -74,8 +74,8 @@ ret i32 %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.rii -define i32 @shfl.sync.rii(i32 %mask, i32 %a) { +; CHECK-LABEL: .func{{.*}}shfl_sync_rii +define i32 @shfl_sync_rii(i32 %mask, i32 %a) { ; CHECK: ld.param.u32 [[MASK:%r[0-9]+]] ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 1, 2, [[MASK]]; @@ -84,8 +84,8 @@ ret i32 %val } -; CHECK-LABEL: .func{{.*}}shfl.sync.iii -define i32 @shfl.sync.iii(i32 %a, i32 %b) { +; CHECK-LABEL: .func{{.*}}shfl_sync_iii +define i32 @shfl_sync_iii(i32 %a, i32 %b) { ; CHECK: ld.param.u32 [[A:%r[0-9]+]] ; CHECK: shfl.sync.down.b32 [[OUT:%r[0-9]+]], [[A]], 2, 3, 1; ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] diff --git a/llvm/test/CodeGen/NVPTX/shfl.ll b/llvm/test/CodeGen/NVPTX/shfl.ll --- a/llvm/test/CodeGen/NVPTX/shfl.ll +++ b/llvm/test/CodeGen/NVPTX/shfl.ll @@ -12,8 +12,8 @@ ; Try all four permutations of register and immediate parameters with ; shfl.down. -; CHECK-LABEL: .func{{.*}}shfl.down1 -define i32 @shfl.down1(i32 %in) { +; CHECK-LABEL: .func{{.*}}shfl_down1 +define i32 @shfl_down1(i32 %in) { ; CHECK: ld.param.u32 [[IN:%r[0-9]+]] ; CHECK: shfl.down.b32 [[OUT:%r[0-9]+]], [[IN]], 1, 2; ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] @@ -21,8 +21,8 @@ ret i32 %val } -; CHECK-LABEL: .func{{.*}}shfl.down2 -define i32 @shfl.down2(i32 %in, i32 %width) { +; CHECK-LABEL: .func{{.*}}shfl_down2 +define i32 @shfl_down2(i32 %in, i32 %width) { ; CHECK: ld.param.u32 [[IN1:%r[0-9]+]] ; CHECK: ld.param.u32 [[IN2:%r[0-9]+]] ; CHECK: shfl.down.{{.}}32 %r{{[0-9]+}}, [[IN1]], [[IN2]], 3; @@ -30,8 +30,8 @@ ret i32 %val } -; CHECK-LABEL: .func{{.*}}shfl.down3 -define i32 @shfl.down3(i32 %in, i32 %mask) { +; CHECK-LABEL: .func{{.*}}shfl_down3 +define i32 @shfl_down3(i32 %in, i32 %mask) { ; CHECK: ld.param.u32 [[IN1:%r[0-9]+]] ; CHECK: ld.param.u32 [[IN2:%r[0-9]+]] ; CHECK: shfl.down.{{.}}32 %r{{[0-9]+}}, [[IN1]], 4, [[IN2]]; @@ -39,8 +39,8 @@ ret i32 %val } -; CHECK-LABEL: .func{{.*}}shfl.down4 -define i32 @shfl.down4(i32 %in, i32 %width, i32 %mask) { +; CHECK-LABEL: .func{{.*}}shfl_down4 +define i32 @shfl_down4(i32 %in, i32 %width, i32 %mask) { ; CHECK: ld.param.u32 [[IN1:%r[0-9]+]] ; CHECK: ld.param.u32 [[IN2:%r[0-9]+]] ; CHECK: ld.param.u32 [[IN3:%r[0-9]+]] @@ -50,8 +50,8 @@ } ; Try shfl.down with floating-point params. -; CHECK-LABEL: .func{{.*}}shfl.down.float -define float @shfl.down.float(float %in) { +; CHECK-LABEL: .func{{.*}}shfl_down_float +define float @shfl_down_float(float %in) { ; CHECK: ld.param.f32 [[IN:%f[0-9]+]] ; CHECK: shfl.down.b32 [[OUT:%f[0-9]+]], [[IN]], 5, 6; ; CHECK: st.param.{{.}}32 {{.*}}, [[OUT]] @@ -61,7 +61,7 @@ ; Try the rest of the shfl modes. Hopefully they're declared in such a way ; that if shfl.down works correctly, they also work correctly. -define void @shfl.rest(i32 %in_i32, float %in_float, i32* %out_i32, float* %out_float) { +define void @shfl_rest(i32 %in_i32, float %in_float, i32* %out_i32, float* %out_float) { ; CHECK: shfl.up.b32 %r{{[0-9]+}}, %r{{[0-9]+}}, 1, 2; %up_i32 = call i32 @llvm.nvvm.shfl.up.i32(i32 %in_i32, i32 1, i32 2) store i32 %up_i32, i32* %out_i32 diff --git a/llvm/test/CodeGen/NVPTX/vote.ll b/llvm/test/CodeGen/NVPTX/vote.ll --- a/llvm/test/CodeGen/NVPTX/vote.ll +++ b/llvm/test/CodeGen/NVPTX/vote.ll @@ -1,64 +1,64 @@ ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 -mattr=+ptx60 | FileCheck %s declare i1 @llvm.nvvm.vote.all(i1) -; CHECK-LABEL: .func{{.*}}vote.all -define i1 @vote.all(i1 %pred) { +; CHECK-LABEL: .func{{.*}}vote_all +define i1 @vote_all(i1 %pred) { ; CHECK: vote.all.pred %val = call i1 @llvm.nvvm.vote.all(i1 %pred) ret i1 %val } declare i1 @llvm.nvvm.vote.any(i1) -; CHECK-LABEL: .func{{.*}}vote.any -define i1 @vote.any(i1 %pred) { +; CHECK-LABEL: .func{{.*}}vote_any +define i1 @vote_any(i1 %pred) { ; CHECK: vote.any.pred %val = call i1 @llvm.nvvm.vote.any(i1 %pred) ret i1 %val } declare i1 @llvm.nvvm.vote.uni(i1) -; CHECK-LABEL: .func{{.*}}vote.uni -define i1 @vote.uni(i1 %pred) { +; CHECK-LABEL: .func{{.*}}vote_uni +define i1 @vote_uni(i1 %pred) { ; CHECK: vote.uni.pred %val = call i1 @llvm.nvvm.vote.uni(i1 %pred) ret i1 %val } declare i32 @llvm.nvvm.vote.ballot(i1) -; CHECK-LABEL: .func{{.*}}vote.ballot -define i32 @vote.ballot(i1 %pred) { +; CHECK-LABEL: .func{{.*}}vote_ballot +define i32 @vote_ballot(i1 %pred) { ; CHECK: vote.ballot.b32 %val = call i32 @llvm.nvvm.vote.ballot(i1 %pred) ret i32 %val } declare i1 @llvm.nvvm.vote.all.sync(i32, i1) -; CHECK-LABEL: .func{{.*}}vote.sync.all -define i1 @vote.sync.all(i32 %mask, i1 %pred) { +; CHECK-LABEL: .func{{.*}}vote_sync_all +define i1 @vote_sync_all(i32 %mask, i1 %pred) { ; CHECK: vote.sync.all.pred %val = call i1 @llvm.nvvm.vote.all.sync(i32 %mask, i1 %pred) ret i1 %val } declare i1 @llvm.nvvm.vote.any.sync(i32, i1) -; CHECK-LABEL: .func{{.*}}vote.sync.any -define i1 @vote.sync.any(i32 %mask, i1 %pred) { +; CHECK-LABEL: .func{{.*}}vote_sync_any +define i1 @vote_sync_any(i32 %mask, i1 %pred) { ; CHECK: vote.sync.any.pred %val = call i1 @llvm.nvvm.vote.any.sync(i32 %mask, i1 %pred) ret i1 %val } declare i1 @llvm.nvvm.vote.uni.sync(i32, i1) -; CHECK-LABEL: .func{{.*}}vote.sync.uni -define i1 @vote.sync.uni(i32 %mask, i1 %pred) { +; CHECK-LABEL: .func{{.*}}vote_sync_uni +define i1 @vote_sync_uni(i32 %mask, i1 %pred) { ; CHECK: vote.sync.uni.pred %val = call i1 @llvm.nvvm.vote.uni.sync(i32 %mask, i1 %pred) ret i1 %val } declare i32 @llvm.nvvm.vote.ballot.sync(i32, i1) -; CHECK-LABEL: .func{{.*}}vote.sync.ballot -define i32 @vote.sync.ballot(i32 %mask, i1 %pred) { +; CHECK-LABEL: .func{{.*}}vote_sync_ballot +define i32 @vote_sync_ballot(i32 %mask, i1 %pred) { ; CHECK: vote.sync.ballot.b32 %val = call i32 @llvm.nvvm.vote.ballot.sync(i32 %mask, i1 %pred) ret i32 %val