diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -53,7 +53,8 @@ const bool IsOffset = ValuePair.second; for (auto &U : ValuePair.first->uses()) { auto *I = cast(U.getUser()); - + if (isa(I)) + continue; if (auto *LI = dyn_cast(I)) { // Ignore non-volatile loads, they are always ok. if (!LI->isSimple()) return false; @@ -256,6 +257,10 @@ if (Load->isVolatile()) return false; Worklist.insert(Load); + } else if (auto *PHI = dyn_cast(Inst)) { + for (unsigned int I = 0; I < PHI->getNumOperands(); ++I) + Worklist.insert(cast(PHI->getOperand(I))); + Worklist.insert(PHI); } else if (isa(Inst) || isa(Inst)) { Worklist.insert(Inst); if (!collectUsers(*Inst)) @@ -293,6 +298,18 @@ IC.InsertNewInstWith(NewI, *LT); IC.replaceInstUsesWith(*LT, NewI); WorkMap[LT] = NewI; + } else if (auto *PHI = dyn_cast(I)) { + SmallVector ReplacedOperands; + for (unsigned int I = 0; I < PHI->getNumOperands(); ++I) { + ReplacedOperands.push_back(getReplacement(PHI->getOperand(I))); + assert(ReplacedOperands.back() && "Operand not replaced"); + } + auto *NewPHI = PHINode::Create(PHI->getType(), PHI->getNumIncomingValues(), + PHI->getName()); + NewPHI->takeName(PHI); + IC.InsertNewInstWith(NewPHI, *PHI); + IC.replaceInstUsesWith(*PHI, NewPHI); + WorkMap[PHI] = NewPHI; } else if (auto *GEP = dyn_cast(I)) { auto *V = getReplacement(GEP->getPointerOperand()); assert(V && "Operand not replaced"); diff --git a/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll b/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll @@ -0,0 +1,21 @@ +; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -passes=instcombine -S -o - %s | FileCheck %s + +; CHECK-LABEL: @remove_alloca +define i8 @remove_alloca(i1 %flag, i32 %idx, [32 x i8] addrspace(4)* align 8 %arg) { +entry: + %xp = alloca [32 x i8], align 8, addrspace(5) + call void @llvm.memcpy.p5.p4.i64([32 x i8] addrspace(5)* align 8 %xp, [32 x i8] addrspace(4)* align 8 %arg, i64 1360, i1 false) + br i1 %flag, label %true, label %false +true: + %gep.true = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %xp, i32 0, i32 0 + br label %sink +false: + %gep.false = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %xp, i32 0, i32 0 + br label %sink +sink: + %x.2 = phi i8 addrspace(5)* [%gep.true, %true], [%gep.false, %false] + %x.3 = load i8, i8 addrspace(5)* %x.2, align 4 + ret i8 %x.3 +} + +declare void @llvm.memcpy.p5.p4.i64([32 x i8] addrspace(5)* align 8, [32 x i8] addrspace(4)* align 8, i64, i1)