diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -387,6 +387,8 @@ bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const; + Optional getAssumedAddrSpace(const Value *V) const; + /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p /// NewV, which has a different address space. This should happen for every /// operand index that collectFlatAddressOperands returned for the intrinsic. @@ -1384,6 +1386,7 @@ virtual bool collectFlatAddressOperands(SmallVectorImpl &OpIndexes, Intrinsic::ID IID) const = 0; virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; + virtual Optional getAssumedAddrSpace(const Value *V) const = 0; virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const = 0; @@ -1677,6 +1680,10 @@ return Impl.isNoopAddrSpaceCast(FromAS, ToAS); } + Optional getAssumedAddrSpace(const Value *V) const override { + return Impl.getAssumedAddrSpace(V); + } + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const override { return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV); diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h --- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h @@ -89,6 +89,8 @@ bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } + Optional getAssumedAddrSpace(const Value *V) const { return None; } + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const { return nullptr; diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h --- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h +++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h @@ -224,6 +224,10 @@ return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS); } + Optional getAssumedAddrSpace(const Value *V) const { + return getTLI()->getTargetMachine().getAssumedAddrSpace(V); + } + Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, Value *NewV) const { return nullptr; diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -284,6 +284,16 @@ return false; } + /// If the specified generic pointer could be assumed as a pointer to a + /// specific address space, return that address space. + /// + /// Under offloading programming, the offloading target may be passed with + /// values only prepared on the host side and could assume certain + /// properties. + virtual Optional getAssumedAddrSpace(const Value *V) const { + return None; + } + /// Get a \c TargetIRAnalysis appropriate for the target. /// /// This is used to construct the new pass manager's target IR analysis pass, diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp --- a/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -297,6 +297,11 @@ return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS); } +Optional +TargetTransformInfo::getAssumedAddrSpace(const Value *V) const { + return TTIImpl->getAssumedAddrSpace(V); +} + Value *TargetTransformInfo::rewriteIntrinsicWithAddressSpace( IntrinsicInst *II, Value *OldV, Value *NewV) const { return TTIImpl->rewriteIntrinsicWithAddressSpace(II, OldV, NewV); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h @@ -64,6 +64,8 @@ } bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override; + + Optional getAssumedAddrSpace(const Value *V) const override; }; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -527,6 +527,25 @@ AMDGPU::isFlatGlobalAddrSpace(DestAS); } +Optional +AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const { + auto *Ty = V->getType(); + if (!Ty->isPointerTy() || + Ty->getPointerAddressSpace() != AMDGPUAS::FLAT_ADDRESS) + return None; + const auto *LD = dyn_cast(V); + if (!LD) + return None; + const auto *Ptr = LD->getPointerOperand(); + if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS) + return None; + // For a generic pointer loaded from the constant memory, it could be assumed + // as a global pointer since the constant memory is only populated on the + // host side. As implied by the offload programming model, only global + // pointers could be referenced on the host side. + return AMDGPUAS::GLOBAL_ADDRESS; +} + TargetTransformInfo R600TargetMachine::getTargetTransformInfo(const Function &F) { return TargetTransformInfo(R600TTIImpl(this, F)); diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp --- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -286,6 +286,8 @@ case Instruction::IntToPtr: return isNoopPtrIntCastPair(Op, DL, TTI); default: + if (TTI->getAssumedAddrSpace(&V)) + return true; return false; } } @@ -478,9 +480,12 @@ } // Otherwise, adds its operands to the stack and explores them. PostorderStack.back().setInt(true); - for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) { - appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack, - Visited); + // Skip values with an assumed address space. + if (!TTI->getAssumedAddrSpace(TopVal)) { + for (Value *PtrOperand : getPointerOperands(*TopVal, *DL, TTI)) { + appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack, + Visited); + } } } return Postorder; @@ -555,6 +560,16 @@ return nullptr; } + if (auto AS = TTI->getAssumedAddrSpace(I)) { + // For the assumed address space, insert an `addrspacecast` to make that + // explicit. + auto *NewPtrTy = + I->getType()->getPointerElementType()->getPointerTo(AS.getValue()); + auto *NewI = new AddrSpaceCastInst(I, NewPtrTy); + NewI->insertAfter(I); + return NewI; + } + // Computes the converted pointer operands. SmallVector NewPointerOperands; for (const Use &OperandUse : I->operands()) { @@ -848,15 +863,23 @@ else NewAS = joinAddressSpaces(Src0AS, Src1AS); } else { - for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) { - auto I = InferredAddrSpace.find(PtrOperand); - unsigned OperandAS = I != InferredAddrSpace.end() ? - I->second : PtrOperand->getType()->getPointerAddressSpace(); - - // join(flat, *) = flat. So we can break if NewAS is already flat. - NewAS = joinAddressSpaces(NewAS, OperandAS); - if (NewAS == FlatAddrSpace) - break; + if (auto AS = TTI->getAssumedAddrSpace(&V)) { + // Use the assumed address space directly. + NewAS = AS.getValue(); + } else { + // Otherwise, infer the address space from its pointer operands. + for (Value *PtrOperand : getPointerOperands(V, *DL, TTI)) { + auto I = InferredAddrSpace.find(PtrOperand); + unsigned OperandAS = + I != InferredAddrSpace.end() + ? I->second + : PtrOperand->getType()->getPointerAddressSpace(); + + // join(flat, *) = flat. So we can break if NewAS is already flat. + NewAS = joinAddressSpaces(NewAS, OperandAS); + if (NewAS == FlatAddrSpace) + break; + } } } @@ -1068,6 +1091,9 @@ } User *CurUser = U.getUser(); + // Skip if the current user is the new value itself. + if (CurUser == NewV) + continue; // Handle more complex cases like intrinsic that need to be remangled. if (auto *MI = dyn_cast(CurUser)) { if (!MI->isVolatile() && handleMemIntrinsicPtrUse(MI, V, NewV)) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -138,7 +138,7 @@ ; CHECK-NEXT: s_cselect_b32 s4, 1, 0 ; CHECK-NEXT: s_and_b32 s4, s4, 1 ; CHECK-NEXT: s_cmp_lg_u32 s4, 0 -; CHECK-NEXT: s_cbranch_scc1 BB4_6 +; CHECK-NEXT: s_cbranch_scc1 BB4_4 ; CHECK-NEXT: ; %bb.1: ; %bb2 ; CHECK-NEXT: s_getpc_b64 s[6:7] ; CHECK-NEXT: s_add_u32 s6, s6, const.ptr@gotpcrel32@lo+4 @@ -150,23 +150,23 @@ ; CHECK-NEXT: s_waitcnt lgkmcnt(0) ; CHECK-NEXT: v_mov_b32_e32 v0, s6 ; CHECK-NEXT: v_mov_b32_e32 v1, s7 -; CHECK-NEXT: flat_load_dword v0, v[0:1] -; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, 1.0, v0 -; CHECK-NEXT: s_and_saveexec_b64 s[6:7], vcc +; CHECK-NEXT: global_load_dword v0, v[0:1], off +; CHECK-NEXT: s_waitcnt vmcnt(0) +; CHECK-NEXT: v_cmp_gt_f32_e32 vcc, 1.0, v0 +; CHECK-NEXT: s_cbranch_vccnz BB4_3 ; CHECK-NEXT: ; %bb.2: ; %bb7 ; CHECK-NEXT: s_mov_b32 s4, 0 -; CHECK-NEXT: ; %bb.3: ; %bb8 -; CHECK-NEXT: s_or_b64 exec, exec, s[6:7] -; CHECK-NEXT: v_cmp_eq_u32_e64 s[6:7], s4, 0 -; CHECK-NEXT: s_and_saveexec_b64 s[4:5], s[6:7] -; CHECK-NEXT: s_cbranch_execz BB4_5 -; CHECK-NEXT: ; %bb.4: ; %bb11 +; CHECK-NEXT: BB4_3: ; %bb8 +; CHECK-NEXT: s_cmp_lg_u32 s4, 0 +; CHECK-NEXT: s_cselect_b32 s4, 1, 0 +; CHECK-NEXT: s_and_b32 s4, s4, 1 +; CHECK-NEXT: s_cmp_lg_u32 s4, 0 +; CHECK-NEXT: s_cbranch_scc0 BB4_5 +; CHECK-NEXT: BB4_4: ; %bb12 +; CHECK-NEXT: s_setpc_b64 s[30:31] +; CHECK-NEXT: BB4_5: ; %bb11 ; CHECK-NEXT: v_mov_b32_e32 v0, 4.0 ; CHECK-NEXT: buffer_store_dword v0, v0, s[0:3], 0 offen -; CHECK-NEXT: BB4_5: ; %Flow -; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] -; CHECK-NEXT: BB4_6: ; %bb12 ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] bb: diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/assumed-addrspace.ll @@ -0,0 +1,12 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces -o - %s | FileCheck %s + +@c0 = addrspace(4) global float* undef + +; CHECK-LABEL: @generic_ptr_from_constant +; CHECK: addrspacecast float* %p to float addrspace(1)* +; CHECK-NEXT: load float, float addrspace(1)* +define float @generic_ptr_from_constant() { + %p = load float*, float* addrspace(4)* @c0 + %v = load float, float* %p + ret float %v +}