Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -1744,16 +1744,11 @@ return ""; } - /// Returns true if a cast between SrcAS and DestAS is a noop. - virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { - return false; - } - /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we /// are happy to sink it into basic blocks. A cast may be free, but not /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer. virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { - return isNoopAddrSpaceCast(SrcAS, DestAS); + return TM.isNoopAddrSpaceCast(SrcAS, DestAS); } /// Return true if the pointer arguments to CI should be aligned by aligning @@ -3896,7 +3891,7 @@ /// Should SelectionDAG lower an atomic load of the given kind as a normal /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to /// eventually migrate all targets to the using LoadSDNodes, but porting is - /// being done target at a time. + /// being done target at a time. virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { assert(LI.isAtomic() && "violated precondition"); return false; Index: llvm/include/llvm/Target/TargetMachine.h =================================================================== --- llvm/include/llvm/Target/TargetMachine.h +++ llvm/include/llvm/Target/TargetMachine.h @@ -271,6 +271,11 @@ return Options.BBSectionsFuncListBuf.get(); } + /// Returns true if a cast between SrcAS and DestAS is a noop. + virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { + return false; + } + /// Get a \c TargetIRAnalysis appropriate for the target. /// /// This is used to construct the new pass manager's target IR analysis pass, Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp =================================================================== --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2267,6 +2267,16 @@ } switch (CE->getOpcode()) { + case Instruction::AddrSpaceCast: { + const Constant *Op = CE->getOperand(0); + unsigned DstAS = CE->getType()->getPointerAddressSpace(); + unsigned SrcAS = Op->getType()->getPointerAddressSpace(); + if (TM.isNoopAddrSpaceCast(SrcAS, DstAS)) + return lowerConstant(Op); + + // Fallthrough to error. + LLVM_FALLTHROUGH; + } default: { // If the code isn't optimized, there may be outstanding folding // opportunities. Attempt to fold the expression using DataLayout as a Index: llvm/lib/CodeGen/CodeGenPrepare.cpp =================================================================== --- llvm/lib/CodeGen/CodeGenPrepare.cpp +++ llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -4224,7 +4224,7 @@ unsigned SrcAS = AddrInst->getOperand(0)->getType()->getPointerAddressSpace(); unsigned DestAS = AddrInst->getType()->getPointerAddressSpace(); - if (TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + if (TLI.getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS)) return matchAddr(AddrInst->getOperand(0), Depth); return false; } Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -6272,7 +6272,7 @@ unsigned AS) { // Lowering memcpy / memset / memmove intrinsics to calls is only valid if all // pointer operands can be losslessly bitcasted to pointers of address space 0 - if (AS != 0 && !TLI->isNoopAddrSpaceCast(AS, 0)) { + if (AS != 0 && !TLI->getTargetMachine().isNoopAddrSpaceCast(AS, 0)) { report_fatal_error("cannot lower memory intrinsic in address space " + Twine(AS)); } Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3407,7 +3407,7 @@ unsigned SrcAS = SV->getType()->getPointerAddressSpace(); unsigned DestAS = I.getType()->getPointerAddressSpace(); - if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + if (!TM.isNoopAddrSpaceCast(SrcAS, DestAS)) N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); setValue(&I, N); Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -387,12 +387,6 @@ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - /// Returns true if a cast between SrcAS and DestAS is a noop. - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { - // Addrspacecasts are always noops. - return true; - } - /// This method returns a target specific FastISel object, or null if the /// target does not support "fast" ISel. FastISel *createFastISel(FunctionLoweringInfo &funcInfo, Index: llvm/lib/Target/AArch64/AArch64TargetMachine.h =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetMachine.h +++ llvm/lib/Target/AArch64/AArch64TargetMachine.h @@ -49,6 +49,12 @@ return TLOF.get(); } + /// Returns true if a cast between SrcAS and DestAS is a noop. + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Addrspacecasts are always noops. + return true; + } + private: bool isLittle; }; Index: llvm/lib/Target/AMDGPU/AMDGPU.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPU.h +++ llvm/lib/Target/AMDGPU/AMDGPU.h @@ -271,8 +271,6 @@ }; } -} // End namespace llvm - /// OpenCL uses address spaces to differentiate between /// various memory regions on the hardware. On the CPU /// all of the address spaces point to the same memory, @@ -329,4 +327,17 @@ }; } +namespace AMDGPU { + +// FIXME: Missing constant_32bit +inline bool isFlatGlobalAddrSpace(unsigned AS) { + return AS == AMDGPUAS::GLOBAL_ADDRESS || + AS == AMDGPUAS::FLAT_ADDRESS || + AS == AMDGPUAS::CONSTANT_ADDRESS || + AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; +} +} + +} // End namespace llvm + #endif Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -1498,8 +1498,7 @@ const AMDGPUTargetMachine &TM = static_cast(MF.getTarget()); - const GCNSubtarget &ST = MF.getSubtarget(); - if (ST.getTargetLowering()->isNoopAddrSpaceCast(SrcAS, DestAS)) { + if (TM.isNoopAddrSpaceCast(SrcAS, DestAS)) { MI.setDesc(B.getTII().get(TargetOpcode::G_BITCAST)); return true; } @@ -2086,8 +2085,7 @@ Register CmpVal = MI.getOperand(2).getReg(); Register NewVal = MI.getOperand(3).getReg(); - assert(SITargetLowering::isFlatGlobalAddrSpace( - MRI.getType(PtrReg).getAddressSpace()) && + assert(AMDGPU::isFlatGlobalAddrSpace(MRI.getType(PtrReg).getAddressSpace()) && "this should not have been custom lowered"); LLT ValTy = MRI.getType(CmpVal); Index: llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -2952,7 +2952,7 @@ LLT PtrTy = MRI.getType(PtrReg); unsigned Size = PtrTy.getSizeInBits(); if (Subtarget.useFlatForGlobal() || - !SITargetLowering::isFlatGlobalAddrSpace(PtrTy.getAddressSpace())) + !AMDGPU::isFlatGlobalAddrSpace(PtrTy.getAddressSpace())) return AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); // If we're using MUBUF instructions for global memory, an SGPR base register @@ -2979,8 +2979,7 @@ const RegisterBank *PtrBank = getRegBank(PtrReg, MRI, *TRI); - if (PtrBank == &AMDGPU::SGPRRegBank && - SITargetLowering::isFlatGlobalAddrSpace(AS)) { + if (PtrBank == &AMDGPU::SGPRRegBank && AMDGPU::isFlatGlobalAddrSpace(AS)) { if (isScalarLoadLegal(MI)) { // We have a uniform instruction so we want to use an SMRD load ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -61,6 +61,8 @@ return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS || AddrSpace == AMDGPUAS::REGION_ADDRESS) ? -1 : 0; } + + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; }; //===----------------------------------------------------------------------===// Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -517,6 +517,12 @@ return I.get(); } +bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, + unsigned DestAS) const { + return AMDGPU::isFlatGlobalAddrSpace(SrcAS) && + AMDGPU::isFlatGlobalAddrSpace(DestAS); +} + TargetTransformInfo R600TargetMachine::getTargetTransformInfo(const Function &F) { return TargetTransformInfo(R600TTIImpl(this, F)); Index: llvm/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -268,15 +268,6 @@ AS == AMDGPUAS::PRIVATE_ADDRESS; } - // FIXME: Missing constant_32bit - static bool isFlatGlobalAddrSpace(unsigned AS) { - return AS == AMDGPUAS::GLOBAL_ADDRESS || - AS == AMDGPUAS::FLAT_ADDRESS || - AS == AMDGPUAS::CONSTANT_ADDRESS || - AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; - } - - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; TargetLoweringBase::LegalizeTypeAction Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1361,11 +1361,6 @@ return MVT::Other; } -bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, - unsigned DestAS) const { - return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS); -} - bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const { const MemSDNode *MemNode = cast(N); const Value *Ptr = MemNode->getMemOperand()->getValue(); @@ -1380,7 +1375,7 @@ if (SrcAS == AMDGPUAS::FLAT_ADDRESS) return true; - return isNoopAddrSpaceCast(SrcAS, DestAS); + return getTargetMachine().isNoopAddrSpaceCast(SrcAS, DestAS); } bool SITargetLowering::isMemOpUniform(const SDNode *N) const { @@ -7988,7 +7983,7 @@ unsigned AS = AtomicNode->getAddressSpace(); // No custom lowering required for local address space - if (!isFlatGlobalAddrSpace(AS)) + if (!AMDGPU::isFlatGlobalAddrSpace(AS)) return Op; // Non-local address space requires custom lowering for atomic compare Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -509,12 +509,6 @@ const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent = false) const override; - /// Returns true if a cast between SrcAS and DestAS is a noop. - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { - // Addrspacecasts are always noops. - return true; - } - bool shouldAlignPointerArgs(CallInst *CI, unsigned &MinSize, unsigned &PrefAlign) const override; Index: llvm/lib/Target/ARM/ARMTargetMachine.h =================================================================== --- llvm/lib/Target/ARM/ARMTargetMachine.h +++ llvm/lib/Target/ARM/ARMTargetMachine.h @@ -72,6 +72,12 @@ } bool targetSchedulesPostRAScheduling() const override { return true; }; + + /// Returns true if a cast between SrcAS and DestAS is a noop. + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Addrspacecasts are always noops. + return true; + } }; /// ARM/Thumb little endian target machine. Index: llvm/lib/Target/Mips/MipsISelLowering.h =================================================================== --- llvm/lib/Target/Mips/MipsISelLowering.h +++ llvm/lib/Target/Mips/MipsISelLowering.h @@ -365,14 +365,6 @@ return ABI.IsN64() ? Mips::A1_64 : Mips::A1; } - /// Returns true if a cast between SrcAS and DestAS is a noop. - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { - // Mips doesn't have any special address spaces so we just reserve - // the first 256 for software use (e.g. OpenCL) and treat casts - // between them as noops. - return SrcAS < 256 && DestAS < 256; - } - bool isJumpTableRelative() const override { return getTargetMachine().isPositionIndependent(); } Index: llvm/lib/Target/Mips/MipsTargetMachine.h =================================================================== --- llvm/lib/Target/Mips/MipsTargetMachine.h +++ llvm/lib/Target/Mips/MipsTargetMachine.h @@ -63,6 +63,14 @@ return TLOF.get(); } + /// Returns true if a cast between SrcAS and DestAS is a noop. + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Mips doesn't have any special address spaces so we just reserve + // the first 256 for software use (e.g. OpenCL) and treat casts + // between them as noops. + return SrcAS < 256 && DestAS < 256; + } + bool isLittleEndian() const { return isLittle; } const MipsABIInfo &getABI() const { return ABI; } }; Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1007,11 +1007,6 @@ } }; - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { - // Addrspacecasts are always noops. - return true; - } - bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI, SelectionDAG &DAG, ISD::LoadExtType ET = ISD::NON_EXTLOAD) const; Index: llvm/lib/Target/PowerPC/PPCTargetMachine.h =================================================================== --- llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -58,6 +58,11 @@ const Triple &TT = getTargetTriple(); return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le); }; + + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override { + // Addrspacecasts are always noops. + return true; + } }; } // end namespace llvm Index: llvm/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.h +++ llvm/lib/Target/X86/X86ISelLowering.h @@ -1218,8 +1218,6 @@ Align Alignment, SelectionDAG &DAG) const; - bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; - /// Customize the preferred legalization strategy for certain types. LegalizeTypeAction getPreferredVectorAction(MVT VT) const override; Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -2612,17 +2612,6 @@ return TargetLowering::getSafeStackPointerLocation(IRB); } -bool X86TargetLowering::isNoopAddrSpaceCast(unsigned SrcAS, - unsigned DestAS) const { - assert(SrcAS != DestAS && "Expected different address spaces!"); - - const TargetMachine &TM = getTargetMachine(); - if (TM.getPointerSize(SrcAS) != TM.getPointerSize(DestAS)) - return false; - - return SrcAS < 256 && DestAS < 256; -} - //===----------------------------------------------------------------------===// // Return Value Calling Convention Implementation //===----------------------------------------------------------------------===// Index: llvm/lib/Target/X86/X86TargetMachine.h =================================================================== --- llvm/lib/Target/X86/X86TargetMachine.h +++ llvm/lib/Target/X86/X86TargetMachine.h @@ -52,6 +52,12 @@ TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + bool isMachineVerifierClean() const override { + return false; + } + + bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; }; } // end namespace llvm Index: llvm/lib/Target/X86/X86TargetMachine.cpp =================================================================== --- llvm/lib/Target/X86/X86TargetMachine.cpp +++ llvm/lib/Target/X86/X86TargetMachine.cpp @@ -309,6 +309,14 @@ return I.get(); } +bool X86TargetMachine::isNoopAddrSpaceCast(unsigned SrcAS, + unsigned DestAS) const { + assert(SrcAS != DestAS && "Expected different address spaces!"); + if (getPointerSize(SrcAS) != getPointerSize(DestAS)) + return false; + return SrcAS < 256 && DestAS < 256; +} + //===----------------------------------------------------------------------===// // Command line options for x86 //===----------------------------------------------------------------------===// Index: llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/addrspacecast-initializer-unsupported.ll @@ -0,0 +1,7 @@ +; RUN: not --crash llc -march=amdgcn -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s + +; ERROR: LLVM ERROR: Unsupported expression in static initializer: addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*) + +@lds.arr = unnamed_addr addrspace(3) global [256 x i32] undef, align 4 + +@gv_flatptr_from_lds = unnamed_addr addrspace(2) global i32 addrspace(4)* getelementptr ([256 x i32], [256 x i32] addrspace(4)* addrspacecast ([256 x i32] addrspace(3)* @lds.arr to [256 x i32] addrspace(4)*), i64 0, i64 8), align 4 Index: llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/addrspacecast-initializer.ll @@ -0,0 +1,27 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s + +; CHECK: global.arr: +; CHECK: .zero 1024 +; CHECK: .size global.arr, 1024 + +; CHECK: gv_flatptr_from_global: +; CHECK: .quad global.arr+32 +; CHECK: .size gv_flatptr_from_global, 8 + +; CHECK: gv_global_ptr: +; CHECK: .quad global.arr+32 +; CHECK: .size gv_global_ptr, 8 + +; CHECK: gv_flatptr_from_constant: +; CHECK: .quad constant.arr+32 +; CHECK: .size gv_flatptr_from_constant, 8 + +@global.arr = unnamed_addr addrspace(1) global [256 x i32] undef, align 4 +@constant.arr = external unnamed_addr addrspace(4) global [256 x i32], align 4 + +@gv_flatptr_from_global = unnamed_addr addrspace(4) global i32 addrspace(0)* getelementptr ([256 x i32], [256 x i32] addrspace(0)* addrspacecast ([256 x i32] addrspace(1)* @global.arr to [256 x i32] addrspace(0)*), i64 0, i64 8), align 4 + + +@gv_global_ptr = unnamed_addr addrspace(4) global i32 addrspace(1)* getelementptr ([256 x i32], [256 x i32] addrspace(1)* @global.arr, i64 0, i64 8), align 4 + +@gv_flatptr_from_constant = unnamed_addr addrspace(4) global i32 addrspace(0)* getelementptr ([256 x i32], [256 x i32] addrspace(0)* addrspacecast ([256 x i32] addrspace(4)* @constant.arr to [256 x i32] addrspace(0)*), i64 0, i64 8), align 4