diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -52,14 +52,16 @@ const auto [Value, IsOffset] = ValuesToInspect.pop_back_val(); for (auto &U : Value->uses()) { auto *I = cast<Instruction>(U.getUser()); - if (auto *LI = dyn_cast<LoadInst>(I)) { // Ignore non-volatile loads, they are always ok. if (!LI->isSimple()) return false; continue; } - - if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) { + if (isa<PHINode>(I)) { + ValuesToInspect.emplace_back(I, true); + continue; + } + if (isa<BitCastInst, AddrSpaceCastInst>(I)) { // If uses of the bitcast are ok, we are ok. ValuesToInspect.emplace_back(I, IsOffset); continue; @@ -106,7 +108,8 @@ } // If this is isn't our memcpy/memmove, reject it as something we can't - // handle. + // handle. If a PHI was already defined, then this MemTransferInst is a + // descendant of the PHI. Reject this case as well. MemTransferInst *MI = dyn_cast<MemTransferInst>(I); if (!MI) return false; @@ -241,7 +244,7 @@ public: PointerReplacer(InstCombinerImpl &IC) : IC(IC) {} - bool collectUsers(Instruction &I); + bool collectUsers(Instruction &I, MemTransferInst *Copy); void replacePointer(Instruction &I, Value *V); private: @@ -254,16 +257,34 @@ }; } // end anonymous namespace -bool PointerReplacer::collectUsers(Instruction &I) { +bool PointerReplacer::collectUsers(Instruction &I, MemTransferInst *Copy) { + dbgs() << "Collecting users for Inst "; + I.dump(); for (auto *U : I.users()) { auto *Inst = cast<Instruction>(&*U); + dbgs() << "Use " << *U << '\n'; if (auto *Load = dyn_cast<LoadInst>(Inst)) { if (Load->isVolatile()) return false; Worklist.insert(Load); - } else if (isa<GetElementPtrInst>(Inst) || isa<BitCastInst>(Inst)) { + } else if (auto *PHI = dyn_cast<PHINode>(Inst)) { + // Check if any of the incoming values of PHI is the destination of Copy + auto CopySrcAddrSpace = Copy->getSourceAddressSpace(); + auto PHIAddrSpace = PHI->getType()->getPointerAddressSpace(); + for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) { + auto *V = PHI->getIncomingValue(Idx); + if (CopySrcAddrSpace != PHIAddrSpace && V == Copy->getDest()) + return false; + if (auto *Inst = dyn_cast<Instruction>(V)) + Worklist.insert(Inst); + } + + Worklist.insert(PHI); + if (!collectUsers(*PHI, Copy)) + return false; + } else if (isa<GetElementPtrInst, BitCastInst>(Inst)) { Worklist.insert(Inst); - if (!collectUsers(*Inst)) + if (!collectUsers(*Inst, Copy)) return false; } else if (auto *MI = dyn_cast<MemTransferInst>(Inst)) { if (MI->isVolatile()) @@ -298,6 +319,14 @@ IC.InsertNewInstWith(NewI, *LT); IC.replaceInstUsesWith(*LT, NewI); WorkMap[LT] = NewI; + } else if (auto *PHI = dyn_cast<PHINode>(I)) { + Type *NewTy = getReplacement(PHI->getIncomingValue(0))->getType(); + auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(), + PHI->getName(), PHI); + for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I) + NewPHI->addIncoming(getReplacement(PHI->getIncomingValue(I)), + PHI->getIncomingBlock(I)); + WorkMap[PHI] = NewPHI; } else if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { auto *V = getReplacement(GEP->getPointerOperand()); assert(V && "Operand not replaced"); @@ -432,10 +461,9 @@ } PointerReplacer PtrReplacer(*this); - if (PtrReplacer.collectUsers(AI)) { + if (PtrReplacer.collectUsers(AI, Copy)) { for (Instruction *Delete : ToDelete) eraseInstFromFunction(*Delete); - Value *Cast = Builder.CreateBitCast(TheSrc, DestTy); PtrReplacer.replacePointer(AI, Cast); ++NumGlobalCopies; diff --git a/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll b/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll @@ -0,0 +1,258 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=instcombine -S -o - %s | FileCheck %s + +target datalayout="p5:32:32-A5" + +@g1 = constant [32 x i8] zeroinitializer +@g2 = addrspace(1) constant [32 x i8] zeroinitializer + +define i8 @remove_alloca_use_arg(i1 %cond) { +; CHECK-LABEL: @remove_alloca_use_arg( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 2), [[IF]] ], [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 1), [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %sink + +sink: + %ptr = phi ptr addrspace(1) [ %val.if, %if ], [ %val.else, %else ] + %load = load i8, ptr addrspace(1) %ptr + ret i8 %load +} + +define i8 @volatile_load_keep_alloca(i1 %cond) { +; CHECK-LABEL: @volatile_load_keep_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(1) +; CHECK-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef align 4 dereferenceable(256) [[ALLOCA]], ptr noundef nonnull align 16 dereferenceable(256) @g1, i64 256, i1 false) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[VAL_IF:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 1 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: [[VAL_ELSE:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 2 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, ptr addrspace(1) [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + br label %sink + +sink: + %ptr = phi ptr addrspace(1) [ %val.if, %if ], [ %val.else, %else ] + %load = load volatile i8, ptr addrspace(1) %ptr + ret i8 %load +} + + +define i8 @no_memcpy_keep_alloca(i1 %cond) { +; CHECK-LABEL: @no_memcpy_keep_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(1) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[VAL_IF:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 1 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: [[VAL_ELSE:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 2 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, ptr addrspace(1) [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + br label %sink + +sink: + %ptr = phi ptr addrspace(1) [ %val.if, %if ], [ %val.else, %else ] + %load = load volatile i8, ptr addrspace(1) %ptr + ret i8 %load +} + +define i8 @loop_phi_remove_alloca(i1 %cond) { +; CHECK-LABEL: @loop_phi_remove_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB_0:%.*]] +; CHECK: bb.0: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 1), [[ENTRY:%.*]] ], [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 2), [[BB_1:%.*]] ] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB_1]], label [[EXIT:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: br label [[BB_0]] +; CHECK: exit: +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false) + %val1 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %bb.0 + +bb.0: + %ptr = phi ptr addrspace(1) [ %val1, %entry ], [ %val2, %bb.1 ] + br i1 %cond, label %bb.1, label %exit + +bb.1: + %val2 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + br label %bb.0 + +exit: + %load = load i8, ptr addrspace(1) %ptr + ret i8 %load +} + +define i32 @remove_alloca_ptr_arg(i1 %c, ptr %ptr) { +; CHECK-LABEL: @remove_alloca_ptr_arg( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ @g1, [[IF]] ], [ [[PTR:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[PHI]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %alloca = alloca [32 x i8] + call void @llvm.memcpy.p0.p0.i64(ptr %alloca, ptr @g1, i64 32, i1 false) + br i1 %c, label %if, label %join + +if: + br label %join + +join: + %phi = phi ptr [ %alloca, %if ], [ %ptr, %entry ] + %v = load i32, ptr %phi + ret i32 %v +} + +define i8 @loop_phi_late_memtransfer(i1 %cond) { +; CHECK-LABEL: @loop_phi_late_memtransfer( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB_0:%.*]] +; CHECK: bb.0: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 1), [[ENTRY:%.*]] ], [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 2), [[BB_1:%.*]] ] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB_1]], label [[EXIT:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: br label [[BB_0]] +; CHECK: exit: +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + %val1 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %bb.0 + +bb.0: + %ptr = phi ptr addrspace(1) [ %val1, %entry ], [ %val2, %bb.1 ] + br i1 %cond, label %bb.1, label %exit + +bb.1: + %val2 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false) + br label %bb.0 + +exit: + %load = load i8, ptr addrspace(1) %ptr + ret i8 %load +} + +define i32 @test_memcpy_after_phi(i1 %cond, ptr %ptr) { +; CHECK-LABEL: @test_memcpy_after_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [32 x i8], align 1 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[A]], [[IF]] ], [ [[PTR:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(32) [[PHI]], ptr noundef nonnull align 16 dereferenceable(32) @g1, i64 32, i1 false) +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[PHI]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a = alloca [32 x i8] + br i1 %cond, label %if, label %join + +if: + br label %join + +join: + %phi = phi ptr [ %a, %if ], [ %ptr, %entry ] + call void @llvm.memcpy.p0.p0.i64(ptr %phi, ptr @g1, i64 32, i1 false) + %v = load i32, ptr %phi + ret i32 %v +} + +define i32 @addrspace_diff_keep_alloca(i1 %cond, ptr %x) { +; CHECK-LABEL: @addrspace_diff_keep_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p1.i64(ptr noundef nonnull align 1 dereferenceable(32) [[ALLOCA]], ptr addrspace(1) noundef align 16 dereferenceable(32) [[G2:@.*]], i64 32, i1 false) +; CHECK-NEXT: br i1 %cond, label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: ; preds = %entry +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: ; preds = %if, %entry +; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[ALLOCA]], [[IF]] ], [ [[X:%.*]], %entry ] +; CHECK-NEXT: [[VAL:%.*]] = load i32, ptr [[PHI]], align 4 +; CHECK-NEXT: ret i32 [[VAL]] +; CHECK-NEXT: } +; +entry: + %a = alloca [32 x i8] + call void @llvm.memcpy.p0.p1.i64(ptr %a, ptr addrspace(1) @g2, i64 32, i1 false) + br i1 %cond, label %if, label %join + +if: + br label %join + +join: + %phi = phi ptr [ %a, %if ], [ %x, %entry ] + %v = load i32, ptr %phi + ret i32 %v +} + +declare void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1), ptr, i64, i1) +declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) +declare void @llvm.memcpy.p0.p1.i64(ptr, ptr addrspace(1), i64, i1)