Index: lib/Transforms/Scalar/InferAddressSpaces.cpp =================================================================== --- lib/Transforms/Scalar/InferAddressSpaces.cpp +++ lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -141,6 +141,8 @@ void inferAddressSpaces(const std::vector &Postorder, ValueToAddrSpaceMapTy *InferredAddrSpace) const; + bool handleComplexPtrUse(User &U, Value *OldV, Value *NewV) const; + // Changes the generic address expressions in function F to point to specific // address spaces if InferredAddrSpace says so. Postorder is the postorder of // all generic address expressions in the use-def graph of function F. @@ -153,6 +155,13 @@ Value *V, std::vector> *PostorderStack, DenseSet *Visited) const; + bool rewriteIntrinsicOperands(IntrinsicInst *II, + Value *OldV, Value *NewV) const; + void collectRewritableIntrinsicOperands( + IntrinsicInst *II, + std::vector> *PostorderStack, + DenseSet *Visited) const; + std::vector collectGenericAddressExpressions(Function &F) const; Value *cloneValueWithNewAddressSpace( Value *V, unsigned NewAddrSpace, @@ -210,6 +219,47 @@ } } +// TODO: Move logic to TTI? +bool InferAddressSpaces::rewriteIntrinsicOperands(IntrinsicInst *II, + Value *OldV, + Value *NewV) const { + Module *M = II->getParent()->getParent()->getParent(); + + switch (II->getIntrinsicID()) { + case Intrinsic::objectsize: + case Intrinsic::amdgcn_atomic_inc: + case Intrinsic::amdgcn_atomic_dec: { + Type *DestTy = II->getType(); + Type *SrcTy = NewV->getType(); + Function *NewDecl + = Intrinsic::getDeclaration(M, II->getIntrinsicID(), { DestTy, SrcTy }); + II->setArgOperand(0, NewV); + II->setCalledFunction(NewDecl); + return true; + } + default: + return false; + } +} + +// TODO: Move logic to TTI? +void InferAddressSpaces::collectRewritableIntrinsicOperands( + IntrinsicInst *II, + std::vector> *PostorderStack, + DenseSet *Visited) const { + switch (II->getIntrinsicID()) { + case Intrinsic::objectsize: + case Intrinsic::amdgcn_atomic_inc: + case Intrinsic::amdgcn_atomic_dec: + appendsGenericAddressExpressionToPostorderStack( + II->getArgOperand(0), PostorderStack, Visited); + break; + default: + break; + } +} + +// Returns all generic address expressions in function F. The elements are // If V is an unvisited generic address expression, appends V to PostorderStack // and marks it as visited. void InferAddressSpaces::appendsGenericAddressExpressionToPostorderStack( @@ -223,7 +273,6 @@ } } -// Returns all generic address expressions in function F. The elements are // ordered in postorder. std::vector InferAddressSpaces::collectGenericAddressExpressions(Function &F) const { @@ -257,9 +306,9 @@ appendsGenericAddressExpressionToPostorderStack( MTI->getRawSource(), &PostorderStack, &Visited); } + } else if (IntrinsicInst *II = dyn_cast(&I)) { + collectRewritableIntrinsicOperands(II, &PostorderStack, &Visited); } - - // TODO: Support target intrinsics } std::vector Postorder; // The resultant postorder. @@ -568,7 +617,8 @@ /// Handle updating uses that require more complex processing than simply /// replacing an operand. e.g. intrinsic uses that need to be re-mangled. /// \returns true on sucess but does not remove the user instruction \p U. -static bool handleComplexPtrUse(User &U, Value *OldV, Value *NewV) { +bool InferAddressSpaces::handleComplexPtrUse(User &U, + Value *OldV, Value *NewV) const { if (MemIntrinsic *MI = dyn_cast(&U)) { if (MI->isVolatile()) return false; @@ -612,6 +662,9 @@ return true; } + if (IntrinsicInst *II = dyn_cast(&U)) + return rewriteIntrinsicOperands(II, OldV, NewV); + return false; } Index: test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll =================================================================== --- /dev/null +++ test/Transforms/InferAddressSpaces/AMDGPU/intrinsics.ll @@ -0,0 +1,92 @@ +; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s + +; CHECK-LABEL: @objectsize_group_to_flat_i32( +; CHECK: %val = call i32 @llvm.objectsize.i32.p3i8(i8 addrspace(3)* %group.ptr, i1 true) +define i32 @objectsize_group_to_flat_i32(i8 addrspace(3)* %group.ptr) #0 { + %cast = addrspacecast i8 addrspace(3)* %group.ptr to i8 addrspace(4)* + %val = call i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)* %cast, i1 true) + ret i32 %val +} + +; CHECK-LABEL: @objectsize_global_to_flat_i64( +; CHECK: %val = call i64 @llvm.objectsize.i64.p3i8(i8 addrspace(3)* %global.ptr, i1 true) +define i64 @objectsize_global_to_flat_i64(i8 addrspace(3)* %global.ptr) #0 { + %cast = addrspacecast i8 addrspace(3)* %global.ptr to i8 addrspace(4)* + %val = call i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)* %cast, i1 true) + ret i64 %val +} + +; CHECK-LABEL: @atomicinc_global_to_flat_i32( +; CHECK: call i32 @llvm.amdgcn.atomic.inc.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %y) +define i32 @atomicinc_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %y) #0 { + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* + %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y) + ret i32 %ret +} + +; CHECK-LABEL: @atomicinc_group_to_flat_i32( +; CHECK: %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %y) +define i32 @atomicinc_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %y) #0 { + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* + %ret = call i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* %cast, i32 %y) + ret i32 %ret +} + +; CHECK-LABEL: @atomicinc_global_to_flat_i64( +; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y) +define i64 @atomicinc_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 { + %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)* + %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y) + ret i64 %ret +} + +; CHECK-LABEL: @atomicinc_group_to_flat_i64( +; CHECK: call i64 @llvm.amdgcn.atomic.inc.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y) +define i64 @atomicinc_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 { + %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)* + %ret = call i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* %cast, i64 %y) + ret i64 %ret +} + +; CHECK-LABEL: @atomicdec_global_to_flat_i32( +; CHECK: call i32 @llvm.amdgcn.atomic.dec.i32.p1i32(i32 addrspace(1)* %global.ptr, i32 %val) +define i32 @atomicdec_global_to_flat_i32(i32 addrspace(1)* %global.ptr, i32 %val) #0 { + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* + %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val) + ret i32 %ret +} + +; CHECK-LABEL: @atomicdec_group_to_flat_i32( +; CHECK: %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p3i32(i32 addrspace(3)* %group.ptr, i32 %val) +define i32 @atomicdec_group_to_flat_i32(i32 addrspace(3)* %group.ptr, i32 %val) #0 { + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* + %ret = call i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* %cast, i32 %val) + ret i32 %ret +} + +; CHECK-LABEL: @atomicdec_global_to_flat_i64( +; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p1i64(i64 addrspace(1)* %global.ptr, i64 %y) +define i64 @atomicdec_global_to_flat_i64(i64 addrspace(1)* %global.ptr, i64 %y) #0 { + %cast = addrspacecast i64 addrspace(1)* %global.ptr to i64 addrspace(4)* + %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y) + ret i64 %ret +} + +; CHECK-LABEL: @atomicdec_group_to_flat_i64( +; CHECK: call i64 @llvm.amdgcn.atomic.dec.i64.p3i64(i64 addrspace(3)* %group.ptr, i64 %y) +define i64 @atomicdec_group_to_flat_i64(i64 addrspace(3)* %group.ptr, i64 %y) #0 { + %cast = addrspacecast i64 addrspace(3)* %group.ptr to i64 addrspace(4)* + %ret = call i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* %cast, i64 %y) + ret i64 %ret +} + +declare i32 @llvm.objectsize.i32.p4i8(i8 addrspace(4)*, i1) #1 +declare i64 @llvm.objectsize.i64.p4i8(i8 addrspace(4)*, i1) #1 +declare i32 @llvm.amdgcn.atomic.inc.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2 +declare i64 @llvm.amdgcn.atomic.inc.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2 +declare i32 @llvm.amdgcn.atomic.dec.i32.p4i32(i32 addrspace(4)* nocapture, i32) #2 +declare i64 @llvm.amdgcn.atomic.dec.i64.p4i64(i64 addrspace(4)* nocapture, i64) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind argmemonly }