diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -12,6 +12,7 @@ #include "InstCombineInternal.h" #include "llvm/ADT/MapVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -53,14 +54,13 @@ const bool IsOffset = ValuePair.second; for (auto &U : ValuePair.first->uses()) { auto *I = cast(U.getUser()); - if (auto *LI = dyn_cast(I)) { // Ignore non-volatile loads, they are always ok. if (!LI->isSimple()) return false; continue; } - if (isa(I) || isa(I)) { + if (isa(I) || isa(I) || isa(I)) { // If uses of the bitcast are ok, we are ok. ValuesToInspect.emplace_back(I, IsOffset); continue; @@ -256,6 +256,12 @@ if (Load->isVolatile()) return false; Worklist.insert(Load); + } else if (auto *PHI = dyn_cast(Inst)) { + for (unsigned I = 0; I < PHI->getNumIncomingValues(); ++I) + Worklist.insert(cast(PHI->getIncomingValue(I))); + Worklist.insert(PHI); + if (!collectUsers(*PHI)) + return false; } else if (isa(Inst) || isa(Inst)) { Worklist.insert(Inst); if (!collectUsers(*Inst)) @@ -293,6 +299,15 @@ IC.InsertNewInstWith(NewI, *LT); IC.replaceInstUsesWith(*LT, NewI); WorkMap[LT] = NewI; + } else if (auto *PHI = dyn_cast(I)) { + auto *NewTy = getReplacement(PHI->getIncomingValue(0))->getType(); + auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues()); + for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I) + NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)), + PHI->getIncomingBlock(I)); + IC.InsertNewInstWith(NewPHI, *PHI); + NewPHI->takeName(PHI); + WorkMap[PHI] = NewPHI; } else if (auto *GEP = dyn_cast(I)) { auto *V = getReplacement(GEP->getPointerOperand()); assert(V && "Operand not replaced"); @@ -430,7 +445,6 @@ if (PtrReplacer.collectUsers(AI)) { for (Instruction *Delete : ToDelete) eraseInstFromFunction(*Delete); - Value *Cast = Builder.CreateBitCast(TheSrc, DestTy); PtrReplacer.replacePointer(AI, Cast); ++NumGlobalCopies; diff --git a/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll b/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll @@ -0,0 +1,110 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=instcombine -S -o - %s | FileCheck %s + +target triple="amdgcn-amd-amdhsa" + +define i8 @remove_alloca_use_arg([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i1 %cond) { +; CHECK-LABEL: @remove_alloca_use_arg( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[VAL_IF:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG:%.*]], i64 0, i64 1 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: [[VAL_ELSE:%.*]] = getelementptr [32 x i8], [32 x i8] addrspace(4)* [[ARG]], i64 0, i64 0 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR:%.*]] = phi i8 addrspace(4)* [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, i8 addrspace(4)* [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(5) + call void @llvm.memcpy.p5i8.p4i8.i64([32 x i8] addrspace(5)* %alloca, [32 x i8] addrspace(4)* %arg, i64 256, i1 false) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 1 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 0 + br label %sink + +sink: + %ptr = phi i8 addrspace(5)* [ %val.if, %if ], [ %val.else, %else ] + %load = load i8, i8 addrspace(5)* %ptr + ret i8 %load +} + +define i8 @volatile_load_keep_alloca([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i1 %cond) { +; CHECK-LABEL: @volatile_load_keep_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.memcpy.p5a32i8.p4a32i8.i64([32 x i8] addrspace(5)* noundef nonnull align 4 dereferenceable(256) [[ALLOCA]], [32 x i8] addrspace(4)* noundef align 4 dereferenceable(256) [[ARG:%.*]], i64 256, i1 false) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[VAL_IF:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i64 0, i64 1 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: [[VAL_ELSE:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i64 0, i64 0 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR:%.*]] = phi i8 addrspace(5)* [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, i8 addrspace(5)* [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(5) + call void @llvm.memcpy.p5i8.p4i8.i64([32 x i8] addrspace(5)* %alloca, [32 x i8] addrspace(4)* %arg, i64 256, i1 false) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 1 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 0 + br label %sink + +sink: + %ptr = phi i8 addrspace(5)* [ %val.if, %if ], [ %val.else, %else ] + %load = load volatile i8, i8 addrspace(5)* %ptr + ret i8 %load +} + +define i8 @no_memcpy_keep_alloca([32 x i8] addrspace(4)* noalias readonly align 4 dereferenceable(32) %arg, i1 %cond) { +; CHECK-LABEL: @no_memcpy_keep_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[VAL_IF:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i64 0, i64 1 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: [[VAL_ELSE:%.*]] = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* [[ALLOCA]], i64 0, i64 0 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR:%.*]] = phi i8 addrspace(5)* [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, i8 addrspace(5)* [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(5) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 1 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], [32 x i8] addrspace(5)* %alloca, i32 0, i32 0 + br label %sink + +sink: + %ptr = phi i8 addrspace(5)* [ %val.if, %if ], [ %val.else, %else ] + %load = load volatile i8, i8 addrspace(5)* %ptr + ret i8 %load +} + +declare void @llvm.memcpy.p5i8.p4i8.i64([32 x i8] addrspace(5)*, [32 x i8] addrspace(4)*, i64, i1)