Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1873,31 +1873,9 @@ return true; } - if (DestAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { - // Truncate. - B.buildExtract(Dst, Src, 0); - MI.eraseFromParent(); - return true; - } - - if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) { - const SIMachineFunctionInfo *Info = MF.getInfo(); - uint32_t AddrHiVal = Info->get32BitAddressHighBits(); - - // FIXME: This is a bit ugly due to creating a merge of 2 pointers to - // another. Merge operands are required to be the same type, but creating an - // extra ptrtoint would be kind of pointless. - auto HighAddr = B.buildConstant( - LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS_32BIT, 32), AddrHiVal); - B.buildMerge(Dst, {Src, HighAddr}); - MI.eraseFromParent(); - return true; - } - - if (SrcAS == AMDGPUAS::FLAT_ADDRESS) { - assert(DestAS == AMDGPUAS::LOCAL_ADDRESS || - DestAS == AMDGPUAS::PRIVATE_ADDRESS); - + if (SrcAS == AMDGPUAS::FLAT_ADDRESS && + (DestAS == AMDGPUAS::LOCAL_ADDRESS || + DestAS == AMDGPUAS::PRIVATE_ADDRESS)) { if (isKnownNonNull(Src, MRI, TM, SrcAS)) { // Extract low 32-bits of the pointer. B.buildExtract(Dst, Src, 0); @@ -1921,37 +1899,70 @@ return true; } - if (SrcAS != AMDGPUAS::LOCAL_ADDRESS && SrcAS != AMDGPUAS::PRIVATE_ADDRESS) - return false; + if (DestAS == AMDGPUAS::FLAT_ADDRESS && + (SrcAS == AMDGPUAS::LOCAL_ADDRESS || + SrcAS == AMDGPUAS::PRIVATE_ADDRESS)) { + if (!ST.hasFlatAddressSpace()) + return false; - if (!ST.hasFlatAddressSpace()) - return false; + Register ApertureReg = getSegmentAperture(SrcAS, MRI, B); + if (!ApertureReg.isValid()) + return false; - Register ApertureReg = getSegmentAperture(SrcAS, MRI, B); - if (!ApertureReg.isValid()) - return false; + // Coerce the type of the low half of the result so we can use merge_values. + Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0); - // Coerce the type of the low half of the result so we can use merge_values. - Register SrcAsInt = B.buildPtrToInt(S32, Src).getReg(0); + // TODO: Should we allow mismatched types but matching sizes in merges to + // avoid the ptrtoint? + auto BuildPtr = B.buildMerge(DstTy, {SrcAsInt, ApertureReg}); - // TODO: Should we allow mismatched types but matching sizes in merges to - // avoid the ptrtoint? - auto BuildPtr = B.buildMerge(DstTy, {SrcAsInt, ApertureReg}); + if (isKnownNonNull(Src, MRI, TM, SrcAS)) { + B.buildCopy(Dst, BuildPtr); + MI.eraseFromParent(); + return true; + } + + auto SegmentNull = B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS)); + auto FlatNull = B.buildConstant(DstTy, TM.getNullPointerValue(DestAS)); + + auto CmpRes = B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src, + SegmentNull.getReg(0)); + + B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull); - if (isKnownNonNull(Src, MRI, TM, SrcAS)) { - B.buildCopy(Dst, BuildPtr); MI.eraseFromParent(); return true; } - auto SegmentNull = B.buildConstant(SrcTy, TM.getNullPointerValue(SrcAS)); - auto FlatNull = B.buildConstant(DstTy, TM.getNullPointerValue(DestAS)); + if (DestAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT && + SrcTy.getSizeInBits() == 64) { + // Truncate. + B.buildExtract(Dst, Src, 0); + MI.eraseFromParent(); + return true; + } - auto CmpRes = - B.buildICmp(CmpInst::ICMP_NE, LLT::scalar(1), Src, SegmentNull.getReg(0)); + if (SrcAS == AMDGPUAS::CONSTANT_ADDRESS_32BIT && + DstTy.getSizeInBits() == 64) { + const SIMachineFunctionInfo *Info = MF.getInfo(); + uint32_t AddrHiVal = Info->get32BitAddressHighBits(); + + // FIXME: This is a bit ugly due to creating a merge of 2 pointers to + // another. Merge operands are required to be the same type, but creating an + // extra ptrtoint would be kind of pointless. + auto HighAddr = B.buildConstant( + LLT::pointer(AMDGPUAS::CONSTANT_ADDRESS_32BIT, 32), AddrHiVal); + B.buildMerge(Dst, {Src, HighAddr}); + MI.eraseFromParent(); + return true; + } - B.buildSelect(Dst, CmpRes, BuildPtr, FlatNull); + DiagnosticInfoUnsupported InvalidAddrSpaceCast( + MF.getFunction(), "invalid addrspacecast", B.getDebugLoc()); + LLVMContext &Ctx = MF.getFunction().getContext(); + Ctx.diagnose(InvalidAddrSpaceCast); + B.buildUndef(Dst); MI.eraseFromParent(); return true; } Index: llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll =================================================================== --- llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll +++ llvm/test/CodeGen/AMDGPU/invalid-addrspacecast.ll @@ -1,4 +1,5 @@ -; RUN: not llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -global-isel=0 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s 2>&1 | FileCheck -check-prefix=ERROR %s +; RUN: not llc -global-isel=1 -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca < %s 2>&1 | FileCheck -check-prefix=ERROR %s ; ERROR: error: :0:0: in function use_group_to_global_addrspacecast void (i32 addrspace(3)*): invalid addrspacecast define amdgpu_kernel void @use_group_to_global_addrspacecast(i32 addrspace(3)* %ptr) { @@ -8,8 +9,29 @@ } ; ERROR: error: :0:0: in function use_local_to_constant32bit_addrspacecast void (i32 addrspace(3)*): invalid addrspacecast -define amdgpu_kernel void @use_local_to_constant32bit_addrspacecast(i32 addrspace(3)* %ptr) #0 { +define amdgpu_kernel void @use_local_to_constant32bit_addrspacecast(i32 addrspace(3)* %ptr) { %stof = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(6)* %load = load volatile i32, i32 addrspace(6)* %stof ret void } + +; ERROR: error: :0:0: in function use_constant32bit_to_local_addrspacecast void (i32 addrspace(6)*): invalid addrspacecast +define amdgpu_kernel void @use_constant32bit_to_local_addrspacecast(i32 addrspace(6)* %ptr) { + %cast = addrspacecast i32 addrspace(6)* %ptr to i32 addrspace(3)* + %load = load volatile i32, i32 addrspace(3)* %cast + ret void +} + +; ERROR: error: :0:0: in function use_local_to_42_addrspacecast void (i32 addrspace(3)*): invalid addrspacecast +define amdgpu_kernel void @use_local_to_42_addrspacecast(i32 addrspace(3)* %ptr) { + %cast = addrspacecast i32 addrspace(3)* %ptr to i32 addrspace(42)* + store volatile i32 addrspace(42)* %cast, i32 addrspace(42)* addrspace(1)* null + ret void +} + +; ERROR: error: :0:0: in function use_42_to_local_addrspacecast void (i32 addrspace(42)*): invalid addrspacecast +define amdgpu_kernel void @use_42_to_local_addrspacecast(i32 addrspace(42)* %ptr) { + %cast = addrspacecast i32 addrspace(42)* %ptr to i32 addrspace(3)* + %load = load volatile i32, i32 addrspace(3)* %cast + ret void +}