Index: lib/Transforms/Scalar/InferAddressSpaces.cpp =================================================================== --- lib/Transforms/Scalar/InferAddressSpaces.cpp +++ lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -235,13 +235,21 @@ // We only explore address expressions that are reachable from loads and // stores for now because we aim at generating faster loads and stores. for (Instruction &I : instructions(F)) { - if (isa(I)) { + if (LoadInst *LI = dyn_cast(&I)) { appendsGenericAddressExpressionToPostorderStack( - I.getOperand(0), &PostorderStack, &Visited); - } else if (isa(I)) { + LI->getPointerOperand(), &PostorderStack, &Visited); + } else if (StoreInst *SI = dyn_cast(&I)) { appendsGenericAddressExpressionToPostorderStack( - I.getOperand(1), &PostorderStack, &Visited); + SI->getPointerOperand(), &PostorderStack, &Visited); + } else if (AtomicRMWInst *RMW = dyn_cast(&I)) { + appendsGenericAddressExpressionToPostorderStack( + RMW->getPointerOperand(), &PostorderStack, &Visited); + } else if (AtomicCmpXchgInst *CmpX = dyn_cast(&I)) { + appendsGenericAddressExpressionToPostorderStack( + CmpX->getPointerOperand(), &PostorderStack, &Visited); } + + // TODO: Support intrinsics } std::vector Postorder; // The resultant postorder. @@ -526,6 +534,27 @@ return NewAS; } +static bool isCompatiblePtrUse(Use &U) { + User *Inst = U.getUser(); + unsigned OpNo = U.getOperandNo(); + + if (LoadInst *LI = dyn_cast(Inst)) + return OpNo == LoadInst::getPointerOperandIndex() && !LI->isVolatile(); + + if (StoreInst *SI = dyn_cast(Inst)) + return OpNo == StoreInst::getPointerOperandIndex() && !SI->isVolatile(); + + if (AtomicRMWInst *RMW = dyn_cast(Inst)) + return OpNo == AtomicRMWInst::getPointerOperandIndex() && !RMW->isVolatile(); + + if (AtomicCmpXchgInst *CmpX = dyn_cast(Inst)) { + return OpNo == AtomicCmpXchgInst::getPointerOperandIndex() && + !CmpX->isVolatile(); + } + + return false; +} + bool InferAddressSpaces::rewriteWithNewAddressSpaces( const std::vector &Postorder, const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { @@ -569,15 +598,10 @@ << "\n with\n " << *NewV << '\n'); for (Use *U : Uses) { - LoadInst *LI = dyn_cast(U->getUser()); - StoreInst *SI = dyn_cast(U->getUser()); - - if ((LI && !LI->isVolatile()) || - (SI && !SI->isVolatile() && - U->getOperandNo() == StoreInst::getPointerOperandIndex())) { - // If V is used as the pointer operand of a load/store, sets the pointer - // operand to NewV. This replacement does not change the element type, - // so the resultant load/store is still valid. + if (isCompatiblePtrUse(*U)) { + // If V is used as the pointer operand of a compatible memory operation, + // sets the pointer operand to NewV. This replacement does not change + // the element type, so the resultant load/store is still valid. U->set(NewV); } else if (isa(U->getUser())) { // Otherwise, replaces the use with generic(NewV). Index: test/Transforms/InferAddressSpaces/AMDGPU/basic.ll =================================================================== --- test/Transforms/InferAddressSpaces/AMDGPU/basic.ll +++ test/Transforms/InferAddressSpaces/AMDGPU/basic.ll @@ -119,4 +119,46 @@ ret void } -attributes #0 = { nounwind } \ No newline at end of file +; CHECK-LABEL: @atomicrmw_add_global_to_flat( +; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(1)* %global.ptr, i32 %y seq_cst +define i32 @atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 { + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* + %ret = atomicrmw add i32 addrspace(4)* %cast, i32 %y seq_cst + ret i32 %ret +} + +; CHECK-LABEL: @atomicrmw_add_group_to_flat( +; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(3)* %group.ptr, i32 %y seq_cst +define i32 @atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 { + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* + %ret = atomicrmw add i32 addrspace(4)* %cast, i32 %y seq_cst + ret i32 %ret +} + +; CHECK-LABEL: @cmpxchg_global_to_flat( +; CHECK: %ret = cmpxchg i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val seq_cst monotonic +define { i32, i1 } @cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 { + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* + %ret = cmpxchg i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + +; CHECK-LABEL: @cmpxchg_group_to_flat( +; CHECK: %ret = cmpxchg i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val seq_cst monotonic +define { i32, i1 } @cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 { + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* + %ret = cmpxchg i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + +; Not pointer operand +; CHECK-LABEL: @cmpxchg_group_to_flat_wrong_operand( +; CHECK: %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32 addrspace(4)* +; CHECK: %ret = cmpxchg i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(4)* %cast.cmp, i32 addrspace(4)* %val seq_cst monotonic +define { i32 addrspace(4)*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(3)* %cmp.ptr, i32 addrspace(4)* %val) #0 { + %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32 addrspace(4)* + %ret = cmpxchg i32 addrspace(4)* addrspace(3)* %cas.ptr, i32 addrspace(4)* %cast.cmp, i32 addrspace(4)* %val seq_cst monotonic + ret { i32 addrspace(4)*, i1 } %ret +} + +attributes #0 = { nounwind } Index: test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll =================================================================== --- test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll +++ test/Transforms/InferAddressSpaces/AMDGPU/volatile.ll @@ -79,4 +79,40 @@ ret void } +; CHECK-LABEL: @volatile_atomicrmw_add_group_to_flat( +; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* +; CHECK: atomicrmw volatile add i32 addrspace(4)* +define i32 @volatile_atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 { + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* + %ret = atomicrmw volatile add i32 addrspace(4)* %cast, i32 %y seq_cst + ret i32 %ret +} + +; CHECK-LABEL: @volatile_atomicrmw_add_global_to_flat( +; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* +; CHECK: %ret = atomicrmw volatile add i32 addrspace(4)* +define i32 @volatile_atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 { + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* + %ret = atomicrmw volatile add i32 addrspace(4)* %cast, i32 %y seq_cst + ret i32 %ret +} + +; CHECK-LABEL: @volatile_cmpxchg_global_to_flat( +; CHECK: addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* +; CHECK: cmpxchg volatile i32 addrspace(4)* +define { i32, i1 } @volatile_cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 { + %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32 addrspace(4)* + %ret = cmpxchg volatile i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + +; CHECK-LABEL: @volatile_cmpxchg_group_to_flat( +; CHECK: addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* +; CHECK: cmpxchg volatile i32 addrspace(4)* +define { i32, i1 } @volatile_cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 { + %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32 addrspace(4)* + %ret = cmpxchg volatile i32 addrspace(4)* %cast, i32 %cmp, i32 %val seq_cst monotonic + ret { i32, i1 } %ret +} + attributes #0 = { nounwind } \ No newline at end of file