Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4942,6 +4942,10 @@ } } + if (ASC->getDestAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT && + Src.getValueType() == MVT::i64) + return DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Src); + // global <-> flat are no-ops and never emitted. const MachineFunction &MF = DAG.getMachineFunction(); Index: llvm/test/CodeGen/AMDGPU/addrspacecast.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/addrspacecast.ll +++ llvm/test/CodeGen/AMDGPU/addrspacecast.ll @@ -290,6 +290,38 @@ ret void } +; HSA-LABEL: {{^}}use_constant_to_constant32_addrspacecast +; GFX9: s_load_dwordx2 [[PTRPTR:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}} +; GFX9: s_load_dword [[OFFSET:s[0-9]+]], s[4:5], 0x8{{$}} +; GFX9: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}, [[PTRPTR]], 0x0{{$}} +; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}} +; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]] +; GFX9: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x0{{$}} +define amdgpu_kernel void @use_constant_to_constant32_addrspacecast(i8 addrspace(4)* addrspace(4)* %ptr.ptr, i32 %offset) #0 { + %ptr = load volatile i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %ptr.ptr + %addrspacecast = addrspacecast i8 addrspace(4)* %ptr to i8 addrspace(6)* + %gep = getelementptr i8, i8 addrspace(6)* %addrspacecast, i32 %offset + %ptr.cast = bitcast i8 addrspace(6)* %gep to i32 addrspace(6)* + %load = load volatile i32, i32 addrspace(6)* %ptr.cast, align 4 + ret void +} + +; HSA-LABEL: {{^}}use_global_to_constant32_addrspacecast +; GFX9: s_load_dwordx2 [[PTRPTR:s\[[0-9]+:[0-9]+\]]], s[4:5], 0x0{{$}} +; GFX9: s_load_dword [[OFFSET:s[0-9]+]], s[4:5], 0x8{{$}} +; GFX9: s_load_dwordx2 s{{\[}}[[PTR_LO:[0-9]+]]:[[PTR_HI:[0-9]+]]{{\]}}, [[PTRPTR]], 0x0{{$}} +; GFX9: s_mov_b32 s[[PTR_HI]], 0{{$}} +; GFX9: s_add_i32 s[[PTR_LO]], s[[PTR_LO]], [[OFFSET]] +; GFX9: s_load_dword s{{[0-9]+}}, s{{\[}}[[PTR_LO]]:[[PTR_HI]]{{\]}}, 0x0{{$}} +define amdgpu_kernel void @use_global_to_constant32_addrspacecast(i8 addrspace(1)* addrspace(4)* %ptr.ptr, i32 %offset) #0 { + %ptr = load volatile i8 addrspace(1)*, i8 addrspace(1)* addrspace(4)* %ptr.ptr + %addrspacecast = addrspacecast i8 addrspace(1)* %ptr to i8 addrspace(6)* + %gep = getelementptr i8, i8 addrspace(6)* %addrspacecast, i32 %offset + %ptr.cast = bitcast i8 addrspace(6)* %gep to i32 addrspace(6)* + %load = load volatile i32, i32 addrspace(6)* %ptr.cast, align 4 + ret void +} + declare void @llvm.amdgcn.s.barrier() #1 declare i32 @llvm.amdgcn.workitem.id.x() #2 Index: llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll +++ llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll @@ -13,3 +13,10 @@ store volatile i32 7, i32* %stof ret void } + +; ERROR: error: :0:0: in function use_local_to_constant32bit_addrspacecast void (i32 addrspace(3)*): invalid addrspacecast +define amdgpu_kernel void @use_local_to_constant32bit_addrspacecast(i32 addrspace(3)* %ptr) #0 { + %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(6)* + %load = load volatile i32, i32 addrspace(6)* %stof + ret void +}