diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -13,11 +13,13 @@ #include "InstCombineInternal.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallString.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Loads.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/PatternMatch.h" @@ -53,14 +55,13 @@ const bool IsOffset = ValuePair.second; for (auto &U : ValuePair.first->uses()) { auto *I = cast(U.getUser()); - if (auto *LI = dyn_cast(I)) { // Ignore non-volatile loads, they are always ok. if (!LI->isSimple()) return false; continue; } - if (isa(I) || isa(I)) { + if (isa(I)) { // If uses of the bitcast are ok, we are ok. ValuesToInspect.emplace_back(I, IsOffset); continue; @@ -256,7 +257,15 @@ if (Load->isVolatile()) return false; Worklist.insert(Load); - } else if (isa(Inst) || isa(Inst)) { + } else if (auto *PHI = dyn_cast(Inst)) { + // Insert incoming values to the worklist only if they are instructions. + for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) + if (auto *Inst = dyn_cast(PHI->getIncomingValue(Idx))) + Worklist.insert(Inst); + Worklist.insert(PHI); + if (!collectUsers(*PHI)) + return false; + } else if (isa(Inst)) { Worklist.insert(Inst); if (!collectUsers(*Inst)) return false; @@ -293,6 +302,50 @@ IC.InsertNewInstWith(NewI, *LT); IC.replaceInstUsesWith(*LT, NewI); WorkMap[LT] = NewI; + } else if (auto *PHI = dyn_cast(I)) { + + // Type of new PHI is the type of (replaced) incoming value + // of the old PHI. + auto *V = PHI->getIncomingValue(0); + Type *NewTy = V->getType(); + if (Worklist.contains(dyn_cast(V))) + NewTy = getReplacement(V)->getType(); + + // Create a new PHI instruction. Replace the incoming values + // if possible. If the value is not a result of an instruction, + // cast it into the type of the PHI and use that value as + // an operand of the new PHI. + auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(), "", PHI); + bool PtrCastInserted = false; + for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I) { + V = PHI->getIncomingValue(I); + auto *Inst = dyn_cast(V); + auto *Block = PHI->getIncomingBlock(I); + if (Inst) { + V = getReplacement(V); + assert(V && "Incoming value not replaced"); + } + if (V->getType()->getPointerAddressSpace() != + NewTy->getPointerAddressSpace()) { + V = AddrSpaceCastInst::CreatePointerBitCastOrAddrSpaceCast( + V, NewTy, V->getName(), NewPHI); + PtrCastInserted = true; + } + NewPHI->addIncoming(V, Block); + } + + if (PtrCastInserted) { + auto *OrigTypeCast = + AddrSpaceCastInst::CreatePointerBitCastOrAddrSpaceCast( + NewPHI, PHI->getType()); + IC.InsertNewInstWith(OrigTypeCast, *PHI); + OrigTypeCast->takeName(PHI); + WorkMap[PHI] = OrigTypeCast; + } else { + NewPHI->takeName(PHI); + WorkMap[PHI] = NewPHI; + } + } else if (auto *GEP = dyn_cast(I)) { auto *V = getReplacement(GEP->getPointerOperand()); assert(V && "Operand not replaced"); @@ -430,7 +483,6 @@ if (PtrReplacer.collectUsers(AI)) { for (Instruction *Delete : ToDelete) eraseInstFromFunction(*Delete); - Value *Cast = Builder.CreateBitCast(TheSrc, DestTy); PtrReplacer.replacePointer(AI, Cast); ++NumGlobalCopies; diff --git a/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll b/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll @@ -0,0 +1,202 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=instcombine -S -o - %s | FileCheck %s + +target triple="amdgcn-amd-amdhsa" + +define i8 @remove_alloca_use_arg(ptr addrspace(4) noalias readonly align 4 dereferenceable(32) %arg, i1 %cond) { +; CHECK-LABEL: @remove_alloca_use_arg( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[VAL_IF:%.*]] = getelementptr [32 x i8], ptr addrspace(4) [[ARG:%.*]], i64 0, i64 2 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: [[VAL_ELSE:%.*]] = getelementptr [32 x i8], ptr addrspace(4) [[ARG]], i64 0, i64 1 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(4) [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(4) [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(5) + call void @llvm.memcpy.p5i8.p4i8.i64(ptr addrspace(5) %alloca, ptr addrspace(4) %arg, i64 256, i1 false) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], ptr addrspace(5) %alloca, i32 0, i32 2 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], ptr addrspace(5) %alloca, i32 0, i32 1 + br label %sink + +sink: + %ptr = phi ptr addrspace(5) [ %val.if, %if ], [ %val.else, %else ] + %load = load i8, ptr addrspace(5) %ptr + ret i8 %load +} + +define i8 @volatile_load_keep_alloca(ptr addrspace(4) noalias readonly align 4 dereferenceable(32) %arg, i1 %cond) { +; CHECK-LABEL: @volatile_load_keep_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5) +; CHECK-NEXT: call void @llvm.memcpy.p5.p4.i64(ptr addrspace(5) noundef nonnull align 4 dereferenceable(256) [[ALLOCA]], ptr addrspace(4) noundef align 4 dereferenceable(256) [[ARG:%.*]], i64 256, i1 false) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[VAL_IF:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(5) [[ALLOCA]], i64 0, i64 1 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: [[VAL_ELSE:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(5) [[ALLOCA]], i64 0, i64 2 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(5) [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, ptr addrspace(5) [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(5) + call void @llvm.memcpy.p5i8.p4i8.i64(ptr addrspace(5) %alloca, ptr addrspace(4) %arg, i64 256, i1 false) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], ptr addrspace(5) %alloca, i32 0, i32 1 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], ptr addrspace(5) %alloca, i32 0, i32 2 + br label %sink + +sink: + %ptr = phi ptr addrspace(5) [ %val.if, %if ], [ %val.else, %else ] + %load = load volatile i8, ptr addrspace(5) %ptr + ret i8 %load +} + + +define i8 @no_memcpy_keep_alloca(ptr addrspace(4) noalias readonly align 4 dereferenceable(32) %arg, i1 %cond) { +; CHECK-LABEL: @no_memcpy_keep_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(5) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[VAL_IF:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(5) [[ALLOCA]], i64 0, i64 1 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: [[VAL_ELSE:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(5) [[ALLOCA]], i64 0, i64 2 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(5) [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, ptr addrspace(5) [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(5) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], ptr addrspace(5) %alloca, i32 0, i32 1 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], ptr addrspace(5) %alloca, i32 0, i32 2 + br label %sink + +sink: + %ptr = phi ptr addrspace(5) [ %val.if, %if ], [ %val.else, %else ] + %load = load volatile i8, ptr addrspace(5) %ptr + ret i8 %load +} + + +define i8 @loop_phi_remove_alloca(ptr addrspace(4) noalias readonly align 4 dereferenceable(32) %arg, i1 %cond) { +; CHECK-LABEL: @loop_phi_remove_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[VAL1:%.*]] = getelementptr [32 x i8], ptr addrspace(4) [[ARG:%.*]], i64 0, i64 1 +; CHECK-NEXT: br label [[BB_0:%.*]] +; CHECK: bb.0: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(4) [ [[VAL1]], [[ENTRY:%.*]] ], [ [[VAL2:%.*]], [[BB_1:%.*]] ] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB_1]], label [[EXIT:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: [[VAL2]] = getelementptr [32 x i8], ptr addrspace(4) [[ARG]], i64 0, i64 2 +; CHECK-NEXT: br label [[BB_0]] +; CHECK: exit: +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(4) [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(5) + call void @llvm.memcpy.p5i8.p4i8.i64(ptr addrspace(5) %alloca, ptr addrspace(4) %arg, i64 256, i1 false) + %val1 = getelementptr inbounds [32 x i8], ptr addrspace(5) %alloca, i32 0, i32 1 + br label %bb.0 + +bb.0: + %ptr = phi ptr addrspace(5) [ %val1, %entry ], [ %val2, %bb.1 ] + br i1 %cond, label %bb.1, label %exit + +bb.1: + %val2 = getelementptr inbounds [32 x i8], ptr addrspace(5) %alloca, i32 0, i32 2 + br label %bb.0 + +exit: + %load = load i8, ptr addrspace(5) %ptr + ret i8 %load +} + +@g1 = constant [32 x i8] zeroinitializer + +define i32 @test1(i1 %c, ptr %ptr) { +; CHECK-LABEL: @test1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ @g1, [[IF]] ], [ [[PTR:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[PHI]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a = alloca [32 x i8] + call void @llvm.memcpy.p0.p0.i64(ptr %a, ptr @g1, i64 32, i1 false) + br i1 %c, label %if, label %join + +if: + br label %join + +join: + %phi = phi ptr [ %a, %if ], [ %ptr, %entry ] + %v = load i32, ptr %phi + ret i32 %v +} + +@g2 = addrspace(1) constant [32 x i8] zeroinitializer + +define i32 @test2(i1 %cond, ptr %x) { +; CHECK-LABEL: @test2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[TMP0:%.*]] = phi ptr [ addrspacecast (ptr addrspace(1) @g2 to ptr), [[IF]] ], [ [[X:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[TMP0]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a = alloca [32 x i8] + call void @llvm.memcpy.p0.p1.i64(ptr %a, ptr addrspace(1) @g2, i64 32, i1 false) + br i1 %cond, label %if, label %join + +if: + br label %join + +join: + %phi = phi ptr [ %a, %if ], [ %x, %entry ] + %v = load i32, ptr %phi + ret i32 %v +} + +declare void @llvm.memcpy.p5i8.p4i8.i64(ptr addrspace(5), ptr addrspace(4), i64, i1) +declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) +declare void @llvm.memcpy.p0.p1.i64(ptr, ptr addrspace(1), i64, i1)