Index: .gitignore =================================================================== --- .gitignore +++ .gitignore @@ -1,5 +1,6 @@ Makefile amdgcn-- +amdgcn--amdhsa build/*.pyc built_libs/ generic-- Index: amdgcn-amdhsa/lib/SOURCES =================================================================== --- amdgcn-amdhsa/lib/SOURCES +++ amdgcn-amdhsa/lib/SOURCES @@ -0,0 +1 @@ +workitem/get_local_size.ll Index: amdgcn-amdhsa/lib/workitem/get_local_size.ll =================================================================== --- amdgcn-amdhsa/lib/workitem/get_local_size.ll +++ amdgcn-amdhsa/lib/workitem/get_local_size.ll @@ -0,0 +1,35 @@ +declare i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() #0 + +define i32 @get_local_size(i32 %dim) #1 { + %dispatch_ptr = call noalias nonnull dereferenceable(64) i8 addrspace(2)* @llvm.amdgcn.dispatch.ptr() + %dispatch_ptr_i32 = bitcast i8 addrspace(2)* %dispatch_ptr to i32 addrspace(2)* + %xy_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i64 1 + %xy_size = load i32, i32 addrspace(2)* %xy_size_ptr, align 4, !invariant.load !0 + switch i32 %dim, label %default [ + i32 0, label %x_dim + i32 1, label %y_dim + i32 2, label %z_dim + ] + +x_dim: + %x_size = and i32 %xy_size, 65535 + ret i32 %x_size + +y_dim: + %y_size = lshr i32 %xy_size, 16 + ret i32 %y_size + +z_dim: + %z_size_ptr = getelementptr inbounds i32, i32 addrspace(2)* %dispatch_ptr_i32, i64 2 + %z_size = load i32, i32 addrspace(2)* %z_size_ptr, align 4, !invariant.load !0, !range !1 + ret i32 %z_size + +default: + ret i32 1 +} + +attributes #0 = { nounwind readnone } +attributes #1 = { alwaysinline norecurse nounwind readonly } + +!0 = !{} +!1 = !{ i32 0, i32 257 } Index: configure.py =================================================================== --- configure.py +++ configure.py @@ -102,6 +102,9 @@ 'amdgcn--': { 'devices' : [{'gpu' : 'tahiti', 'aliases' : ['pitcairn', 'verde', 'oland', 'hainan', 'bonaire', 'kabini', 'kaveri', 'hawaii','mullins','tonga','carrizo','iceland','fiji','stoney'], 'defines' : {}} ]}, + 'amdgcn--amdhsa': { 'devices' : + [{'gpu' : '', 'aliases' : ['bonaire', 'hawaii', 'kabini', 'kaveri', 'mullins', 'carrizo', 'stoney', 'fiji', 'iceland', 'tonga'], + 'defines' : {}} ]}, 'nvptx--' : { 'devices' : [{'gpu' : '', 'aliases' : [], 'defines' : {'all' : ['cl_khr_fp64']}}]}, 'nvptx64--' : { 'devices' : [{'gpu' : '', 'aliases' : [], @@ -112,7 +115,7 @@ 'defines' : {'all' : ['cl_khr_fp64']}}]}, } -default_targets = ['nvptx--nvidiacl', 'nvptx64--nvidiacl', 'r600--', 'amdgcn--'] +default_targets = ['nvptx--nvidiacl', 'nvptx64--nvidiacl', 'r600--', 'amdgcn--', 'amdgcn--amdhsa'] targets = args if not targets: