Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h @@ -322,6 +322,11 @@ bool replacedSelectWithOperand(SelectInst *SI, const ICmpInst *Icmp, const unsigned SIOpd); + /// Try to replace instruction \p I with value \p V which are pointers + /// in different address space. + /// \return true if successful. + bool replacePointer(Instruction &I, Value *V); + private: bool shouldChangeType(unsigned FromBitWidth, unsigned ToBitWidth) const; bool shouldChangeType(Type *From, Type *To) const; Index: llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ llvm/trunk/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -12,14 +12,15 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Loads.h" #include "llvm/IR/ConstantRange.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/LLVMContext.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/MDBuilder.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" @@ -224,6 +225,103 @@ return nullptr; } +// If I and V are pointers in different address space, it is not allowed to +// use replaceAllUsesWith since I and V have different types. A +// non-target-specific transformation should not use addrspacecast on V since +// the two address space may be disjoint depending on target. +// +// This class chases down uses of the old pointer until reaching the load +// instructions, then replaces the old pointer in the load instructions with +// the new pointer. If during the chasing it sees bitcast or GEP, it will +// create new bitcast or GEP with the new pointer and use them in the load +// instruction. +class PointerReplacer { +public: + PointerReplacer(InstCombiner &IC) : IC(IC) {} + void replacePointer(Instruction &I, Value *V); + +private: + void findLoadAndReplace(Instruction &I); + void replace(Instruction *I); + Value *getReplacement(Value *I); + + SmallVector Path; + MapVector WorkMap; + InstCombiner &IC; +}; + +void PointerReplacer::findLoadAndReplace(Instruction &I) { + for (auto U : I.users()) { + auto *Inst = dyn_cast(&*U); + if (!Inst) + return; + DEBUG(dbgs() << "Found pointer user: " << *U << '\n'); + if (isa(Inst)) { + for (auto P : Path) + replace(P); + replace(Inst); + } else if (isa(Inst) || isa(Inst)) { + Path.push_back(Inst); + findLoadAndReplace(*Inst); + Path.pop_back(); + } else { + return; + } + } +} + +Value *PointerReplacer::getReplacement(Value *V) { + auto Loc = WorkMap.find(V); + if (Loc != WorkMap.end()) + return Loc->second; + return nullptr; +} + +void PointerReplacer::replace(Instruction *I) { + if (getReplacement(I)) + return; + + if (auto *LT = dyn_cast(I)) { + auto *V = getReplacement(LT->getPointerOperand()); + assert(V && "Operand not replaced"); + auto *NewI = new LoadInst(V); + NewI->takeName(LT); + IC.InsertNewInstWith(NewI, *LT); + IC.replaceInstUsesWith(*LT, NewI); + WorkMap[LT] = NewI; + } else if (auto *GEP = dyn_cast(I)) { + auto *V = getReplacement(GEP->getPointerOperand()); + assert(V && "Operand not replaced"); + SmallVector Indices; + Indices.append(GEP->idx_begin(), GEP->idx_end()); + auto *NewI = GetElementPtrInst::Create( + V->getType()->getPointerElementType(), V, Indices); + IC.InsertNewInstWith(NewI, *GEP); + NewI->takeName(GEP); + WorkMap[GEP] = NewI; + } else if (auto *BC = dyn_cast(I)) { + auto *V = getReplacement(BC->getOperand(0)); + assert(V && "Operand not replaced"); + auto *NewT = PointerType::get(BC->getType()->getPointerElementType(), + V->getType()->getPointerAddressSpace()); + auto *NewI = new BitCastInst(V, NewT); + IC.InsertNewInstWith(NewI, *BC); + NewI->takeName(BC); + WorkMap[GEP] = NewI; + } else { + llvm_unreachable("should never reach here"); + } +} + +void PointerReplacer::replacePointer(Instruction &I, Value *V) { + auto *PT = cast(I.getType()); + auto *NT = cast(V->getType()); + assert(PT != NT && PT->getElementType() == NT->getElementType() && + "Invalid usage"); + WorkMap[&I] = V; + findLoadAndReplace(I); +} + Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { if (auto *I = simplifyAllocaArraySize(*this, AI)) return I; @@ -294,12 +392,22 @@ for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) eraseInstFromFunction(*ToDelete[i]); Constant *TheSrc = cast(Copy->getSource()); - Constant *Cast - = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); - Instruction *NewI = replaceInstUsesWith(AI, Cast); - eraseInstFromFunction(*Copy); - ++NumGlobalCopies; - return NewI; + auto *SrcTy = TheSrc->getType(); + auto *DestTy = PointerType::get(AI.getType()->getPointerElementType(), + SrcTy->getPointerAddressSpace()); + Constant *Cast = + ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, DestTy); + if (AI.getType()->getPointerAddressSpace() == + SrcTy->getPointerAddressSpace()) { + Instruction *NewI = replaceInstUsesWith(AI, Cast); + eraseInstFromFunction(*Copy); + ++NumGlobalCopies; + return NewI; + } else { + PointerReplacer PtrReplacer(*this); + PtrReplacer.replacePointer(AI, Cast); + ++NumGlobalCopies; + } } } } Index: llvm/trunk/test/Transforms/InstCombine/memcpy-addrspace.ll =================================================================== --- llvm/trunk/test/Transforms/InstCombine/memcpy-addrspace.ll +++ llvm/trunk/test/Transforms/InstCombine/memcpy-addrspace.ll @@ -0,0 +1,65 @@ +; RUN: opt < %s -instcombine -S | FileCheck %s + +@test.data = private unnamed_addr addrspace(2) constant [8 x i32] [i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7], align 4 + +; CHECK-LABEL: test_load +; CHECK: %[[GEP:.*]] = getelementptr [8 x i32], [8 x i32] addrspace(2)* @test.data, i64 0, i64 %x +; CHECK: %{{.*}} = load i32, i32 addrspace(2)* %[[GEP]] +; CHECK-NOT: alloca +; CHECK-NOT: call void @llvm.memcpy.p0i8.p2i8.i64 +; CHECK-NOT: addrspacecast +; CHECK-NOT: load i32, i32* +define void @test_load(i32 addrspace(1)* %out, i64 %x) { +entry: + %data = alloca [8 x i32], align 4 + %0 = bitcast [8 x i32]* %data to i8* + call void @llvm.memcpy.p0i8.p2i8.i64(i8* %0, i8 addrspace(2)* bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i32 4, i1 false) + %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x + %1 = load i32, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x + store i32 %1, i32 addrspace(1)* %arrayidx1, align 4 + ret void +} + +; CHECK-LABEL: test_call +; CHECK: alloca +; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64 +; CHECK-NOT: addrspacecast +; CHECK: call i32 @foo(i32* %{{.*}}) +define void @test_call(i32 addrspace(1)* %out, i64 %x) { +entry: + %data = alloca [8 x i32], align 4 + %0 = bitcast [8 x i32]* %data to i8* + call void @llvm.memcpy.p0i8.p2i8.i64(i8* %0, i8 addrspace(2)* bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i32 4, i1 false) + %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x + %1 = call i32 @foo(i32* %arrayidx) + %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x + store i32 %1, i32 addrspace(1)* %arrayidx1, align 4 + ret void +} + +; CHECK-LABEL: test_load_and_call +; CHECK: alloca +; CHECK: call void @llvm.memcpy.p0i8.p2i8.i64 +; CHECK: load i32, i32* %{{.*}} +; CHECK: call i32 @foo(i32* %{{.*}}) +; CHECK-NOT: addrspacecast +; CHECK-NOT: load i32, i32 addrspace(2)* +define void @test_load_and_call(i32 addrspace(1)* %out, i64 %x, i64 %y) { +entry: + %data = alloca [8 x i32], align 4 + %0 = bitcast [8 x i32]* %data to i8* + call void @llvm.memcpy.p0i8.p2i8.i64(i8* %0, i8 addrspace(2)* bitcast ([8 x i32] addrspace(2)* @test.data to i8 addrspace(2)*), i64 32, i32 4, i1 false) + %arrayidx = getelementptr inbounds [8 x i32], [8 x i32]* %data, i64 0, i64 %x + %1 = load i32, i32* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %x + store i32 %1, i32 addrspace(1)* %arrayidx1, align 4 + %2 = call i32 @foo(i32* %arrayidx) + %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %y + store i32 %2, i32 addrspace(1)* %arrayidx2, align 4 + ret void +} + + +declare void @llvm.memcpy.p0i8.p2i8.i64(i8* nocapture writeonly, i8 addrspace(2)* nocapture readonly, i64, i32, i1) +declare i32 @foo(i32* %x)