diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -5041,18 +5041,31 @@ WeakTrackingVH SunkAddrVH = SunkAddrs[Addr]; Value * SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; + Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); if (SunkAddr) { LLVM_DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); - if (SunkAddr->getType() != Addr->getType()) - SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); + if (SunkAddr->getType() != Addr->getType()) { + if (SunkAddr->getType()->getPointerAddressSpace() != + Addr->getType()->getPointerAddressSpace() && + !DL->isNonIntegralPointerType(Addr->getType())) { + // There are two reasons the address spaces might not match: a no-op + // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a + // ptrtoint/inttoptr pair to ensure we match the original semantics. + // TODO: allow bitcast between different address space pointers with the + // same size. + SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr"); + SunkAddr = + Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr"); + } else + SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); + } } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && SubtargetInfo->addrSinkUsingGEPs())) { // By default, we use the GEP-based method when AA is used later. This // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities. LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " << *MemoryInst << "\n"); - Type *IntPtrTy = DL->getIntPtrType(Addr->getType()); Value *ResultPtr = nullptr, *ResultIndex = nullptr; // First, find the pointer. @@ -5181,8 +5194,21 @@ : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr"); } - if (SunkAddr->getType() != Addr->getType()) - SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); + if (SunkAddr->getType() != Addr->getType()) { + if (SunkAddr->getType()->getPointerAddressSpace() != + Addr->getType()->getPointerAddressSpace() && + !DL->isNonIntegralPointerType(Addr->getType())) { + // There are two reasons the address spaces might not match: a no-op + // addrspacecast, or a ptrtoint/inttoptr pair. Either way, we emit a + // ptrtoint/inttoptr pair to ensure we match the original semantics. + // TODO: allow bitcast between different address space pointers with + // the same size. + SunkAddr = Builder.CreatePtrToInt(SunkAddr, IntPtrTy, "sunkaddr"); + SunkAddr = + Builder.CreateIntToPtr(SunkAddr, Addr->getType(), "sunkaddr"); + } else + SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); + } } } else { // We'd require a ptrtoint/inttoptr down the line, which we can't do for diff --git a/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll b/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/CodeGenPrepare/NVPTX/dont-introduce-addrspacecast.ll @@ -0,0 +1,43 @@ +; RUN: opt -S -codegenprepare < %s | FileCheck %s + +target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64" +target triple = "nvptx64-nvidia-cuda" + + +; ptrtoint/inttoptr combinations can introduce semantically-meaningful address space casts +; which we can't sink into an addrspacecast + +; CHECK-LABEL: @test +define void @test(i8* %input_ptr) { + ; CHECK-LABEL: l1: + ; CHECK-NOT: addrspacecast + %intptr = ptrtoint i8* %input_ptr to i64 + %ptr = inttoptr i64 %intptr to i32 addrspace(3)* + + br label %l1 +l1: + + store atomic i32 1, i32 addrspace(3)* %ptr unordered, align 4 + ret void +} + + +; we still should be able to look through multiple sequences of inttoptr/ptrtoint + +; CHECK-LABEL: @test2 +define void @test2(i8* %input_ptr) { + ; CHECK-LABEL: l2: + ; CHECK: bitcast + ; CHECK-NEXT: store + %intptr = ptrtoint i8* %input_ptr to i64 + %ptr = inttoptr i64 %intptr to i32 addrspace(3)* + + %intptr2 = ptrtoint i32 addrspace(3)* %ptr to i64 + %ptr2 = inttoptr i64 %intptr2 to i32* + + br label %l2 +l2: + + store atomic i32 1, i32* %ptr2 unordered, align 4 + ret void +}