Index: lib/Transforms/Scalar/InferAddressSpaces.cpp =================================================================== --- lib/Transforms/Scalar/InferAddressSpaces.cpp +++ lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -232,16 +232,25 @@ std::vector> PostorderStack; // The set of visited expressions. DenseSet Visited; + + auto pushPtrOperand = [&](Value *Ptr) { + appendsGenericAddressExpressionToPostorderStack( + Ptr, &PostorderStack, &Visited); + }; + // We only explore address expressions that are reachable from loads and // stores for now because we aim at generating faster loads and stores. for (Instruction &I : instructions(F)) { - if (isa(I)) { - appendsGenericAddressExpressionToPostorderStack( - I.getOperand(0), &PostorderStack, &Visited); - } else if (isa(I)) { - appendsGenericAddressExpressionToPostorderStack( - I.getOperand(1), &PostorderStack, &Visited); - } + if (auto *LI = dyn_cast(&I)) + pushPtrOperand(LI->getPointerOperand()); + else if (auto *SI = dyn_cast(&I)) + pushPtrOperand(SI->getPointerOperand()); + else if (auto *RMW = dyn_cast(&I)) + pushPtrOperand(RMW->getPointerOperand()); + else if (auto *CmpX = dyn_cast(&I)) + pushPtrOperand(CmpX->getPointerOperand()); + + // TODO: Support intrinsics } std::vector Postorder; // The resultant postorder. @@ -526,6 +535,30 @@ return NewAS; } +/// \p returns true if \p U is the pointer operand of a memory instruction with +/// a single pointer operand that can have its address space changed by simply +/// mutating the use to a new value. +static bool isSimplePointerUseValidToReplace(Use &U) { + User *Inst = U.getUser(); + unsigned OpNo = U.getOperandNo(); + + if (auto *LI = dyn_cast(Inst)) + return OpNo == LoadInst::getPointerOperandIndex() && !LI->isVolatile(); + + if (auto *SI = dyn_cast(Inst)) + return OpNo == StoreInst::getPointerOperandIndex() && !SI->isVolatile(); + + if (auto *RMW = dyn_cast(Inst)) + return OpNo == AtomicRMWInst::getPointerOperandIndex() && !RMW->isVolatile(); + + if (auto *CmpX = dyn_cast(Inst)) { + return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() && + !CmpX->isVolatile(); + } + + return false; +} + bool InferAddressSpaces::rewriteWithNewAddressSpaces( const std::vector &Postorder, const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { @@ -569,15 +602,10 @@ << "\n with\n " << *NewV << '\n'); for (Use *U : Uses) { - LoadInst *LI = dyn_cast(U->getUser()); - StoreInst *SI = dyn_cast(U->getUser()); - - if ((LI && !LI->isVolatile()) || - (SI && !SI->isVolatile() && - U->getOperandNo() == StoreInst::getPointerOperandIndex())) { - // If V is used as the pointer operand of a load/store, sets the pointer - // operand to NewV. This replacement does not change the element type, - // so the resultant load/store is still valid. + if (isSimplePointerUseValidToReplace(*U)) { + // If V is used as the pointer operand of a compatible memory operation, + // sets the pointer operand to NewV. This replacement does not change + // the element type, so the resultant load/store is still valid. U->set(NewV); } else if (isa(U->getUser())) { // Otherwise, replaces the use with generic(NewV). Index: test/Transforms/InferAddressSpaces/AMDGPU/basic.ll =================================================================== --- test/Transforms/InferAddressSpaces/AMDGPU/basic.ll +++ test/Transforms/InferAddressSpaces/AMDGPU/basic.ll @@ -128,4 +128,46 @@ ret void } +; CHECK-LABEL: @atomicrmw_add_global_to_flat( +; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(1)* %global.ptr, i32 %y seq_cst +define i32 @atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 { + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* + %ret = atomicrmw add i32 addrspace(4)* %cast, i32 %y seq_cst + ret i32 %ret +} + +; CHECK-LABEL: @atomicrmw_add_group_to_flat( +; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(3)* %group.ptr, i32 %y seq_cst +define i32 @atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 { + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* + %ret = atomicrmw add i32 addrspace(4)* %cast, i32 %y seq_cst + ret i32 %ret +} + +; CHECK-LABEL: @cmpxchg_global_to_flat( +; CHECK: %ret = cmpxchg i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val seq_cst monotonic +define { i32, i1 } @cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 { + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* + %ret = cmpxchg i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + +; CHECK-LABEL: @cmpxchg_group_to_flat( +; CHECK: %ret = cmpxchg i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val seq_cst monotonic +define { i32, i1 } @cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 { + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* + %ret = cmpxchg i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + +; Not pointer operand +; CHECK-LABEL: @cmpxchg_group_to_flat_wrong_operand( +; CHECK: %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32 addrspace(4)* +; CHECK: %ret = cmpxchg i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(4)* %cast.cmp, i32 addrspace(4)* %val seq_cst monotonic +define { i32 addrspace(4)*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(3)* %cmp.ptr, i32 addrspace(4)* %val) #0 { + %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32 addrspace(4)* + %ret = cmpxchg i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(4)* %cast.cmp, i32 addrspace(4)* %val seq_cst monotonic + ret { i32 addrspace(4)*, i1 } %ret +} + attributes #0 = { nounwind } Index: test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll =================================================================== --- test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll +++ test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll @@ -79,4 +79,40 @@ ret void } +; CHECK-LABEL: @volatile_atomicrmw_add_group_to_flat( +; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* +; CHECK: atomicrmw volatile add i32 addrspace(4)* +define i32 @volatile_atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 { + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* + %ret = atomicrmw volatile add i32 addrspace(4)* %cast, i32 %y seq_cst + ret i32 %ret +} + +; CHECK-LABEL: @volatile_atomicrmw_add_global_to_flat( +; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* +; CHECK: %ret = atomicrmw volatile add i32 addrspace(4)* +define i32 @volatile_atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 { + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* + %ret = atomicrmw volatile add i32 addrspace(4)* %cast, i32 %y seq_cst + ret i32 %ret +} + +; CHECK-LABEL: @volatile_cmpxchg_global_to_flat( +; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* +; CHECK: cmpxchg volatile i32 addrspace(4)* +define { i32, i1 } @volatile_cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 { + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* + %ret = cmpxchg volatile i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + +; CHECK-LABEL: @volatile_cmpxchg_group_to_flat( +; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* +; CHECK: cmpxchg volatile i32 addrspace(4)* +define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 { + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* + %ret = cmpxchg volatile i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + attributes #0 = { nounwind } \ No newline at end of file