Index: lib/Target/AMDGPU/AMDGPU.h =================================================================== --- lib/Target/AMDGPU/AMDGPU.h +++ lib/Target/AMDGPU/AMDGPU.h @@ -224,6 +224,9 @@ GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). CONSTANT_ADDRESS = 2, ///< Address space for constant memory (VTX2) LOCAL_ADDRESS = 3, ///< Address space for local memory. + + CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory + /// Address space for direct addressible parameter memory (CONST0) PARAM_D_ADDRESS = 6, /// Address space for indirect addressible parameter memory (VTX1) Index: lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -272,10 +272,10 @@ // flat. if (TT.getEnvironmentName() == "amdgiz" || TT.getEnvironmentName() == "amdgizcl") - return "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32" + return "e-p:64:64-p1:64:64-p2:64:64-p3:32:32-p4:32:32-p5:32:32-p6:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-A5"; - return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" + return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p6:32:32" "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; } Index: lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- lib/Target/AMDGPU/SIISelLowering.cpp +++ lib/Target/AMDGPU/SIISelLowering.cpp @@ -301,10 +301,8 @@ setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i64, Expand); - if (getSubtarget()->hasFlatAddressSpace()) { - setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); - setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); - } + setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); + setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); setOperationAction(ISD::BSWAP, MVT::i32, Legal); setOperationAction(ISD::BITREVERSE, MVT::i32, Legal); @@ -890,7 +888,8 @@ if (AS == AMDGPUASI.GLOBAL_ADDRESS) return isLegalGlobalAddressingMode(AM); - if (AS == AMDGPUASI.CONSTANT_ADDRESS) { + if (AS == AMDGPUASI.CONSTANT_ADDRESS || + AS == AMDGPUASI.CONSTANT_ADDRESS_32BIT) { // If the offset isn't a multiple of 4, it probably isn't going to be // correctly aligned. // FIXME: Can we get the real alignment here? @@ -3910,6 +3909,15 @@ } } + // constant 32bit -> constant + if (ASC->getSrcAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS_32BIT && + ASC->getDestAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS) { + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, Src, + DAG.getConstant(0, SL, MVT::i32)); + + return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); + } + // global <-> flat are no-ops and never emitted. const MachineFunction &MF = DAG.getMachineFunction(); Index: test/CodeGen/AMDGPU/constant-address-space-32bit.ll =================================================================== --- /dev/null +++ test/CodeGen/AMDGPU/constant-address-space-32bit.ll @@ -0,0 +1,26 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SICI %s +; RUN: llc -march=amdgcn -mcpu=bonaire < %s | FileCheck -check-prefixes=GCN,SICI %s +; RUN: llc -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VIGFX9 %s +; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,VIGFX9 %s + +; GCN-LABEL: {{^}}const_addrspace: +; GCN: s_mov_b32 s2, s1 +; GCN: s_mov_b32 s1, 0 +; GCN: s_mov_b32 s3, s1 +; SICI: s_load_dword s{{[0-9]}}, s[0:1], 0x1 +; SICI: s_load_dword s{{[0-9]}}, s[2:3], 0x2 +; VIGFX9: s_load_dword s{{[0-9]}}, s[0:1], 0x4 +; VIGFX9: s_load_dword s{{[0-9]}}, s[2:3], 0x8 +define amdgpu_vs float @const_addrspace(i32 addrspace(6)* inreg %p0, i32 addrspace(6)* inreg %p1) #0 { + %ptr0 = addrspacecast i32 addrspace(6)* %p0 to i32 addrspace(2)* + %ptr1 = addrspacecast i32 addrspace(6)* %p1 to i32 addrspace(2)* + %gep0 = getelementptr i32, i32 addrspace(2)* %ptr0, i64 1 + %gep1 = getelementptr i32, i32 addrspace(2)* %ptr1, i64 2 + %r0 = load i32, i32 addrspace(2)* %gep0 + %r1 = load i32, i32 addrspace(2)* %gep1 + %r = add i32 %r0, %r1 + %r2 = bitcast i32 %r to float + ret float %r2 +} + +attributes #0 = { nounwind }