Index: llvm/docs/LangRef.rst
===================================================================
--- llvm/docs/LangRef.rst
+++ llvm/docs/LangRef.rst
@@ -10247,8 +10247,8 @@
 For most of these operations, the type of '<value>' must be an integer
 type whose bit width is a power of two greater than or equal to eight
 and less than or equal to a target-specific size limit. For xchg, this
-may also be a floating point type with the same size constraints as
-integers.  For fadd/fsub, this must be a floating point type.  The
+may also be a floating point or a pointer type with the same size constraints
+as integers.  For fadd/fsub, this must be a floating point type.  The
 type of the '``<pointer>``' operand must be a pointer to that type. If
 the ``atomicrmw`` is marked as ``volatile``, then the optimizer is not
 allowed to modify the number or order of execution of this
Index: llvm/lib/AsmParser/LLParser.cpp
===================================================================
--- llvm/lib/AsmParser/LLParser.cpp
+++ llvm/lib/AsmParser/LLParser.cpp
@@ -7434,10 +7434,12 @@
 
   if (Operation == AtomicRMWInst::Xchg) {
     if (!Val->getType()->isIntegerTy() &&
-        !Val->getType()->isFloatingPointTy()) {
-      return error(ValLoc,
-                   "atomicrmw " + AtomicRMWInst::getOperationName(Operation) +
-                       " operand must be an integer or floating point type");
+        !Val->getType()->isFloatingPointTy() &&
+        !Val->getType()->isPointerTy()) {
+      return error(
+          ValLoc,
+          "atomicrmw " + AtomicRMWInst::getOperationName(Operation) +
+              " operand must be an integer, floating point, or pointer type");
     }
   } else if (IsFP) {
     if (!Val->getType()->isFloatingPointTy()) {
@@ -7453,7 +7455,9 @@
     }
   }
 
-  unsigned Size = Val->getType()->getPrimitiveSizeInBits();
+  unsigned Size =
+      PFS.getFunction().getParent()->getDataLayout().getTypeStoreSizeInBits(
+          Val->getType());
   if (Size < 8 || (Size & (Size - 1)))
     return error(ValLoc, "atomicrmw operand must be power-of-two byte-sized"
                          " integer");
Index: llvm/lib/CodeGen/AtomicExpandPass.cpp
===================================================================
--- llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -285,15 +285,6 @@
         MadeChange = true;
       } else {
         AtomicRMWInst::BinOp Op = RMWI->getOperation();
-        if (Op == AtomicRMWInst::Xchg &&
-            RMWI->getValOperand()->getType()->isFloatingPointTy()) {
-          // TODO: add a TLI hook to control this so that each target can
-          // convert to lowering the original type one at a time.
-          RMWI = convertAtomicXchgToIntegerType(RMWI);
-          assert(RMWI->getValOperand()->getType()->isIntegerTy() &&
-                 "invariant broken");
-          MadeChange = true;
-        }
         unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
         unsigned ValueSize = getAtomicOpSize(RMWI);
         if (ValueSize < MinCASSize &&
@@ -385,7 +376,9 @@
   Value *Val = RMWI->getValOperand();
   Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace());
   Value *NewAddr = Builder.CreateBitCast(Addr, PT);
-  Value *NewVal = Builder.CreateBitCast(Val, NewTy);
+  Value *NewVal = Val->getType()->isPointerTy()
+                      ? Builder.CreatePtrToInt(Val, NewTy)
+                      : Builder.CreateBitCast(Val, NewTy);
 
   auto *NewRMWI =
       Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal,
@@ -393,7 +386,9 @@
   NewRMWI->setVolatile(RMWI->isVolatile());
   LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
 
-  Value *NewRVal = Builder.CreateBitCast(NewRMWI, RMWI->getType());
+  Value *NewRVal = RMWI->getType()->isPointerTy()
+                       ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
+                       : Builder.CreateBitCast(NewRMWI, RMWI->getType());
   RMWI->replaceAllUsesWith(NewRVal);
   RMWI->eraseFromParent();
   return NewRMWI;
@@ -525,7 +520,7 @@
   Type *OrigTy = NewVal->getType();
 
   // This code can go away when cmpxchg supports FP types.
-  bool NeedBitcast = OrigTy->isFloatingPointTy();
+  bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isPointerTy();
   if (NeedBitcast) {
     IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
     unsigned AS = Addr->getType()->getPointerAddressSpace();
@@ -545,11 +540,20 @@
 }
 
 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
+  bool MadeChange = false;
   LLVMContext &Ctx = AI->getModule()->getContext();
   TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
+  if (AI->getOperation() == AtomicRMWInst::Xchg &&
+      (AI->getValOperand()->getType()->isFloatingPointTy() ||
+       (Kind != TargetLoweringBase::AtomicExpansionKind::CmpXChg &&
+        AI->getValOperand()->getType()->isPointerTy()))) {
+    AI = convertAtomicXchgToIntegerType(AI);
+    assert(AI->getValOperand()->getType()->isIntegerTy() && "invariant broken");
+    MadeChange = true;
+  }
   switch (Kind) {
   case TargetLoweringBase::AtomicExpansionKind::None:
-    return false;
+    return MadeChange;
   case TargetLoweringBase::AtomicExpansionKind::LLSC: {
     unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
     unsigned ValueSize = getAtomicOpSize(AI);
@@ -572,7 +576,7 @@
     if (ValueSize < MinCASSize) {
       // TODO: Handle atomicrmw fadd/fsub
       if (AI->getType()->isFloatingPointTy())
-        return false;
+        return MadeChange;
 
       expandPartwordAtomicRMW(AI,
                               TargetLoweringBase::AtomicExpansionKind::CmpXChg);
Index: llvm/lib/IR/Verifier.cpp
===================================================================
--- llvm/lib/IR/Verifier.cpp
+++ llvm/lib/IR/Verifier.cpp
@@ -3925,7 +3925,8 @@
   auto Op = RMWI.getOperation();
   Type *ElTy = RMWI.getOperand(1)->getType();
   if (Op == AtomicRMWInst::Xchg) {
-    Check(ElTy->isIntegerTy() || ElTy->isFloatingPointTy(),
+    Check(ElTy->isIntegerTy() || ElTy->isFloatingPointTy() ||
+              ElTy->isPointerTy(),
           "atomicrmw " + AtomicRMWInst::getOperationName(Op) +
               " operand must have integer or floating point type!",
           &RMWI, ElTy);
Index: llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-fp-or-pointer-type.ll
===================================================================
--- /dev/null
+++ llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-fp-or-pointer-type.ll
@@ -0,0 +1,7 @@
+; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s
+
+; CHECK: error: atomicrmw xchg operand must be an integer, floating point, or pointer type
+define void @f(<3 x i1>* %ptr) {
+  atomicrmw xchg <3 x i1>* %ptr, <3 x i1> <i1 1, i1 2, i1 3> seq_cst
+  ret void
+}
Index: llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll
===================================================================
--- llvm/test/Assembler/invalid-atomicrmw-xchg-must-be-integer-or-fp-type.ll
+++ /dev/null
@@ -1,7 +0,0 @@
-; RUN: not llvm-as -disable-output %s 2>&1 | FileCheck %s
-
-; CHECK: error: atomicrmw xchg operand must be an integer or floating point type
-define void @f(i32** %ptr) {
-  atomicrmw xchg i32** %ptr, i32* null seq_cst
-  ret void
-}
Index: llvm/test/Bitcode/compatibility.ll
===================================================================
--- llvm/test/Bitcode/compatibility.ll
+++ llvm/test/Bitcode/compatibility.ll
@@ -854,6 +854,12 @@
   ret void
 }
 
+define void @pointer_atomics(i8** %word) {
+; CHECK: %atomicrmw.xchg = atomicrmw xchg i8** %word, i8* null monotonic
+  %atomicrmw.xchg = atomicrmw xchg i8** %word, i8* null monotonic
+  ret void
+}
+
 ;; Fast Math Flags
 define void @fastmathflags_unop(float %op1) {
   %f.nnan = fneg nnan float %op1
Index: llvm/test/CodeGen/AArch64/atomicrmw-xchg-pointer.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AArch64/atomicrmw-xchg-pointer.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --force-update
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=NOLSE
+; RUN: llc -verify-machineinstrs -mtriple=aarch64-- -mattr=+lse -O1 -fast-isel=0 -global-isel=false %s -o - | FileCheck %s -check-prefix=LSE
+
+define i8* @test_rmw_xchg_pointer(i8** %dst, i8* %new) {
+; NOLSE-LABEL: test_rmw_xchg_pointer:
+; NOLSE:       // %bb.0:
+; NOLSE-NEXT:    mov x8, x0
+; NOLSE-NEXT:  .LBB0_1: // %atomicrmw.start
+; NOLSE-NEXT:    // =>This Inner Loop Header: Depth=1
+; NOLSE-NEXT:    ldaxr x0, [x8]
+; NOLSE-NEXT:    stlxr w9, x1, [x8]
+; NOLSE-NEXT:    cbnz w9, .LBB0_1
+; NOLSE-NEXT:  // %bb.2: // %atomicrmw.end
+; NOLSE-NEXT:    ret
+;
+; LSE-LABEL: test_rmw_xchg_pointer:
+; LSE:       // %bb.0:
+; LSE-NEXT:    swpal x1, x0, [x0]
+; LSE-NEXT:    ret
+  %res = atomicrmw xchg i8** %dst, i8* %new seq_cst
+  ret i8* %res
+}
Index: llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
+++ llvm/test/CodeGen/AMDGPU/flat_atomics_i64.ll
@@ -659,6 +659,15 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}atomic_xchg_pointer_offset:
+; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
+define amdgpu_kernel void @atomic_xchg_pointer_offset(i8** %out, i8* %in) {
+entry:
+  %gep = getelementptr i8*, i8** %out, i32 4
+  %val = atomicrmw volatile xchg i8** %gep, i8* %in seq_cst
+  ret void
+}
+
 ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
 ; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
 ; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
Index: llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
+++ llvm/test/CodeGen/AMDGPU/global_atomics_i64.ll
@@ -794,6 +794,17 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}atomic_xchg_pointer_offset:
+; CIVI: buffer_atomic_swap_x2 v{{\[[0-9]+:[0-9]+\]}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32{{$}}
+
+; GFX9: global_atomic_swap_x2 v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s{{\[[0-9]+:[0-9]+\]}} offset:32{{$}}
+define amdgpu_kernel void @atomic_xchg_pointer_offset(i8* addrspace(1)* %out, i8* %in) {
+entry:
+  %gep = getelementptr i8*, i8* addrspace(1)* %out, i64 4
+  %tmp0 = atomicrmw volatile xchg i8* addrspace(1)* %gep, i8* %in seq_cst
+  ret void
+}
+
 ; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
 ; CIVI: buffer_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:32 glc{{$}}
 ; CIVI: buffer_store_dwordx2 [[RET]]
Index: llvm/test/CodeGen/AMDGPU/local-atomics64.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/local-atomics64.ll
+++ llvm/test/CodeGen/AMDGPU/local-atomics64.ll
@@ -40,6 +40,19 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}lds_atomic_xchg_ret_pointer_offset:
+; SICIVI: s_mov_b32 m0
+; GFX9-NOT: m0
+
+; GCN: ds_wrxchg_rtn_b64 {{.*}} offset:32
+; GCN: s_endpgm
+define amdgpu_kernel void @lds_atomic_xchg_ret_pointer_offset(i8* addrspace(1)* %out, i8* addrspace(3)* %ptr) nounwind {
+  %gep = getelementptr i8*, i8* addrspace(3)* %ptr, i32 4
+  %result = atomicrmw xchg i8* addrspace(3)* %gep, i8* null seq_cst
+  store i8* %result, i8* addrspace(1)* %out, align 8
+  ret void
+}
+
 ; GCN-LABEL: {{^}}lds_atomic_add_ret_i64:
 ; SICIVI: s_mov_b32 m0
 ; GFX9-NOT: m0
Index: llvm/test/CodeGen/X86/atomic64.ll
===================================================================
--- llvm/test/CodeGen/X86/atomic64.ll
+++ llvm/test/CodeGen/X86/atomic64.ll
@@ -4,6 +4,7 @@
 
 @sc64 = external dso_local global i64
 @fsc64 = external dso_local global double
+@psc64 = external dso_local global i8*
 
 define void @atomic_fetch_add64() nounwind {
 ; X64-LABEL: atomic_fetch_add64:
@@ -780,3 +781,18 @@
   %t1 = atomicrmw xchg double* @fsc64, double %x acquire
   ret void
 }
+
+define void @atomic_fetch_swapptr(i8* %x) nounwind {
+; X64-LABEL: atomic_fetch_swapptr:
+; X64:       # %bb.0:
+; X64-NEXT:    xchgq %rdi, psc64(%rip)
+; X64-NEXT:    retq
+;
+; I486-LABEL: atomic_fetch_swapptr:
+; I486:       # %bb.0:
+; I486-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; I486-NEXT:    xchgl %eax, psc64
+; I486-NEXT:    retl
+  %t1 = atomicrmw xchg i8** @psc64, i8* %x acquire
+  ret void
+}
Index: llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-pointer.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/AtomicExpand/AArch64/expand-atomicrmw-xchg-pointer.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -O1 -S -mtriple=aarch64-- -atomic-expand %s | FileCheck %s
+; RUN: opt -O1 -S -mtriple=aarch64-- -mattr=+outline-atomics -atomic-expand %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS
+
+define void @atomic_swap_pointer(i8** %ptr, i8* %val) nounwind {
+; CHECK-LABEL: @atomic_swap_pointer(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8** [[PTR:%.*]] to i64*
+; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint i8* [[VAL:%.*]] to i64
+; CHECK-NEXT:    br label [[ATOMICRMW_START:%.*]]
+; CHECK:       atomicrmw.start:
+; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.aarch64.ldaxr.p0i64(i64* elementtype(i64) [[TMP1]])
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.aarch64.stxr.p0i64(i64 [[TMP2]], i64* elementtype(i64) [[TMP1]])
+; CHECK-NEXT:    [[TRYAGAIN:%.*]] = icmp ne i32 [[TMP4]], 0
+; CHECK-NEXT:    br i1 [[TRYAGAIN]], label [[ATOMICRMW_START]], label [[ATOMICRMW_END:%.*]]
+; CHECK:       atomicrmw.end:
+; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP3]] to i8*
+; CHECK-NEXT:    ret void
+;
+; OUTLINE-ATOMICS-LABEL: @atomic_swap_pointer(
+; OUTLINE-ATOMICS-NEXT:    [[TMP1:%.*]] = bitcast i8** [[PTR:%.*]] to i64*
+; OUTLINE-ATOMICS-NEXT:    [[TMP2:%.*]] = ptrtoint i8* [[VAL:%.*]] to i64
+; OUTLINE-ATOMICS-NEXT:    [[TMP3:%.*]] = atomicrmw xchg i64* [[TMP1]], i64 [[TMP2]] acquire, align 8
+; OUTLINE-ATOMICS-NEXT:    [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to i8*
+; OUTLINE-ATOMICS-NEXT:    ret void
+;
+  %t1 = atomicrmw xchg i8** %ptr, i8* %val acquire
+  ret void
+}
Index: llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-pointer.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/AtomicExpand/X86/expand-atomic-xchg-pointer.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -mtriple=i686-linux-gnu -atomic-expand %s | FileCheck %s
+
+define i8* @atomic_xchg_pointer(i8** %ptr) nounwind {
+; CHECK-LABEL: @atomic_xchg_pointer(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8** [[PTR:%.*]] to i64*
+; CHECK-NEXT:    [[TMP2:%.*]] = atomicrmw xchg i64* [[TMP1]], i64 0 seq_cst, align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i8*
+; CHECK-NEXT:    ret i8* [[TMP3]]
+;
+  %result = atomicrmw xchg i8** %ptr, i8* null seq_cst
+  ret i8* %result
+}
+
+define i8* @atomic_xchg_pointer_as1(i8* addrspace(1)* %ptr) nounwind {
+; CHECK-LABEL: @atomic_xchg_pointer_as1(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* addrspace(1)* [[PTR:%.*]] to i64 addrspace(1)*
+; CHECK-NEXT:    [[TMP2:%.*]] = atomicrmw xchg i64 addrspace(1)* [[TMP1]], i64 0 seq_cst, align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i8*
+; CHECK-NEXT:    ret i8* [[TMP3]]
+;
+  %result = atomicrmw xchg i8* addrspace(1)* %ptr, i8* null seq_cst
+  ret i8* %result
+}