diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp --- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -53,14 +53,12 @@ const bool IsOffset = ValuePair.second; for (auto &U : ValuePair.first->uses()) { auto *I = cast(U.getUser()); - if (auto *LI = dyn_cast(I)) { // Ignore non-volatile loads, they are always ok. if (!LI->isSimple()) return false; continue; } - - if (isa(I) || isa(I)) { + if (isa(I)) { // If uses of the bitcast are ok, we are ok. ValuesToInspect.emplace_back(I, IsOffset); continue; @@ -106,9 +104,11 @@ } // If this is isn't our memcpy/memmove, reject it as something we can't - // handle. + // handle. Furthermore, if the destination of the memtransfer inst + // is a PHI node, reject it as well since the destination varies + // depending on the value of PHI. MemTransferInst *MI = dyn_cast(I); - if (!MI) + if (!MI || isa(MI->getDest())) return false; // If the transfer is using the alloca as a source of the transfer, then @@ -256,7 +256,15 @@ if (Load->isVolatile()) return false; Worklist.insert(Load); - } else if (isa(Inst) || isa(Inst)) { + } else if (auto *PHI = dyn_cast(Inst)) { + // Insert incoming values to the worklist only if they are instructions. + for (unsigned Idx = 0; Idx < PHI->getNumIncomingValues(); ++Idx) + if (auto *Inst = dyn_cast(PHI->getIncomingValue(Idx))) + Worklist.insert(Inst); + Worklist.insert(PHI); + if (!collectUsers(*PHI)) + return false; + } else if (isa(Inst)) { Worklist.insert(Inst); if (!collectUsers(*Inst)) return false; @@ -293,6 +301,50 @@ IC.InsertNewInstWith(NewI, *LT); IC.replaceInstUsesWith(*LT, NewI); WorkMap[LT] = NewI; + } else if (auto *PHI = dyn_cast(I)) { + + // Type of new PHI is the type of (replaced) incoming value + // of the old PHI. + auto *V = PHI->getIncomingValue(0); + Type *NewTy = V->getType(); + if (Worklist.contains(dyn_cast(V))) + NewTy = getReplacement(V)->getType(); + + // Create a new PHI instruction. Replace the incoming values + // if possible. If the value is not a result of an instruction, + // cast it into the type of the PHI and use that value as + // an operand of the new PHI. + auto *NewPHI = PHINode::Create(NewTy, PHI->getNumIncomingValues(), "", PHI); + bool PtrCastInserted = false; + for (unsigned int I = 0; I < PHI->getNumIncomingValues(); ++I) { + V = PHI->getIncomingValue(I); + auto *Inst = dyn_cast(V); + auto *Block = PHI->getIncomingBlock(I); + if (Inst) { + V = getReplacement(V); + assert(V && "Incoming value not replaced"); + } + if (V->getType()->getPointerAddressSpace() != + NewTy->getPointerAddressSpace()) { + V = AddrSpaceCastInst::CreatePointerBitCastOrAddrSpaceCast( + V, NewTy, V->getName(), NewPHI); + PtrCastInserted = true; + } + NewPHI->addIncoming(V, Block); + } + + if (PtrCastInserted) { + auto *OrigTypeCast = + AddrSpaceCastInst::CreatePointerBitCastOrAddrSpaceCast( + NewPHI, PHI->getType()); + IC.InsertNewInstWith(OrigTypeCast, *PHI); + OrigTypeCast->takeName(PHI); + WorkMap[PHI] = OrigTypeCast; + } else { + NewPHI->takeName(PHI); + WorkMap[PHI] = NewPHI; + } + } else if (auto *GEP = dyn_cast(I)) { auto *V = getReplacement(GEP->getPointerOperand()); assert(V && "Operand not replaced"); @@ -430,7 +482,6 @@ if (PtrReplacer.collectUsers(AI)) { for (Instruction *Delete : ToDelete) eraseInstFromFunction(*Delete); - Value *Cast = Builder.CreateBitCast(TheSrc, DestTy); PtrReplacer.replacePointer(AI, Cast); ++NumGlobalCopies; diff --git a/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll b/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/replace-alloca-phi.ll @@ -0,0 +1,256 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=instcombine -S -o - %s | FileCheck %s + +; target triple="amdgcn-amd-amdhsa" +target datalayout="p5:32:32-A5" + +@g1 = constant [32 x i8] zeroinitializer +@g2 = addrspace(1) constant [32 x i8] zeroinitializer + +define i8 @remove_alloca_use_arg(i1 %cond) { +; CHECK-LABEL: @remove_alloca_use_arg( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 2), [[IF]] ], [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 1), [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %sink + +sink: + %ptr = phi ptr addrspace(1) [ %val.if, %if ], [ %val.else, %else ] + %load = load i8, ptr addrspace(1) %ptr + ret i8 %load +} + +define i8 @volatile_load_keep_alloca(i1 %cond) { +; CHECK-LABEL: @volatile_load_keep_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(1) +; CHECK-NEXT: call void @llvm.memcpy.p1.p0.i64(ptr addrspace(1) noundef align 4 dereferenceable(256) [[ALLOCA]], ptr noundef nonnull align 16 dereferenceable(256) @g1, i64 256, i1 false) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[VAL_IF:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 1 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: [[VAL_ELSE:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 2 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, ptr addrspace(1) [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + br label %sink + +sink: + %ptr = phi ptr addrspace(1) [ %val.if, %if ], [ %val.else, %else ] + %load = load volatile i8, ptr addrspace(1) %ptr + ret i8 %load +} + + +define i8 @no_memcpy_keep_alloca(i1 %cond) { +; CHECK-LABEL: @no_memcpy_keep_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [32 x i8], align 4, addrspace(1) +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[ELSE:%.*]] +; CHECK: if: +; CHECK-NEXT: [[VAL_IF:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 1 +; CHECK-NEXT: br label [[SINK:%.*]] +; CHECK: else: +; CHECK-NEXT: [[VAL_ELSE:%.*]] = getelementptr inbounds [32 x i8], ptr addrspace(1) [[ALLOCA]], i64 0, i64 2 +; CHECK-NEXT: br label [[SINK]] +; CHECK: sink: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr addrspace(1) [ [[VAL_IF]], [[IF]] ], [ [[VAL_ELSE]], [[ELSE]] ] +; CHECK-NEXT: [[LOAD:%.*]] = load volatile i8, ptr addrspace(1) [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + br i1 %cond, label %if, label %else + +if: + %val.if = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %sink + +else: + %val.else = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + br label %sink + +sink: + %ptr = phi ptr addrspace(1) [ %val.if, %if ], [ %val.else, %else ] + %load = load volatile i8, ptr addrspace(1) %ptr + ret i8 %load +} + +define i8 @loop_phi_remove_alloca(i1 %cond) { +; CHECK-LABEL: @loop_phi_remove_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB_0:%.*]] +; CHECK: bb.0: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 1), [[ENTRY:%.*]] ], [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 2), [[BB_1:%.*]] ] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB_1]], label [[EXIT:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: br label [[BB_0]] +; CHECK: exit: +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false) + %val1 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %bb.0 + +bb.0: + %ptr = phi ptr addrspace(1) [ %val1, %entry ], [ %val2, %bb.1 ] + br i1 %cond, label %bb.1, label %exit + +bb.1: + %val2 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + br label %bb.0 + +exit: + %load = load i8, ptr addrspace(1) %ptr + ret i8 %load +} + +define i32 @remove_alloca_ptr_arg(i1 %c, ptr %ptr) { +; CHECK-LABEL: @remove_alloca_ptr_arg( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ @g1, [[IF]] ], [ [[PTR:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[PHI]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %alloca = alloca [32 x i8] + call void @llvm.memcpy.p0.p0.i64(ptr %alloca, ptr @g1, i64 32, i1 false) + br i1 %c, label %if, label %join + +if: + br label %join + +join: + %phi = phi ptr [ %alloca, %if ], [ %ptr, %entry ] + %v = load i32, ptr %phi + ret i32 %v +} + +define i32 @addrspace_cast_remove_alloca(i1 %cond, ptr %x) { +; CHECK-LABEL: @addrspace_cast_remove_alloca( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[TMP0:%.*]] = phi ptr [ addrspacecast (ptr addrspace(1) @g2 to ptr), [[IF]] ], [ [[X:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[TMP0]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a = alloca [32 x i8] + call void @llvm.memcpy.p0.p1.i64(ptr %a, ptr addrspace(1) @g2, i64 32, i1 false) + br i1 %cond, label %if, label %join + +if: + br label %join + +join: + %phi = phi ptr [ %a, %if ], [ %x, %entry ] + %v = load i32, ptr %phi + ret i32 %v +} + +define i32 @test_memcpy_after_phi(i1 %cond, ptr %ptr) { +; CHECK-LABEL: @test_memcpy_after_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[A:%.*]] = alloca [32 x i8], align 1 +; CHECK-NEXT: br i1 [[COND:%.*]], label [[IF:%.*]], label [[JOIN:%.*]] +; CHECK: if: +; CHECK-NEXT: br label [[JOIN]] +; CHECK: join: +; CHECK-NEXT: [[PHI:%.*]] = phi ptr [ [[A]], [[IF]] ], [ [[PTR:%.*]], [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 1 dereferenceable(32) [[PHI]], ptr noundef nonnull align 16 dereferenceable(32) @g1, i64 32, i1 false) +; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[PHI]], align 4 +; CHECK-NEXT: ret i32 [[V]] +; +entry: + %a = alloca [32 x i8] + br i1 %cond, label %if, label %join + +if: + br label %join + +join: + %phi = phi ptr [ %a, %if ], [ %ptr, %entry ] + call void @llvm.memcpy.p0.p0.i64(ptr %phi, ptr @g1, i64 32, i1 false) + %v = load i32, ptr %phi + ret i32 %v +} + +define i8 @loop_phi_late_memtransfer(i1 %cond) { +; CHECK-LABEL: @loop_phi_late_memtransfer( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BB_0:%.*]] +; CHECK: bb.0: +; CHECK-NEXT: [[PTR:%.*]] = phi ptr [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 1), [[ENTRY:%.*]] ], [ getelementptr inbounds ([32 x i8], ptr @g1, i64 0, i64 2), [[BB_1:%.*]] ] +; CHECK-NEXT: br i1 [[COND:%.*]], label [[BB_1]], label [[EXIT:%.*]] +; CHECK: bb.1: +; CHECK-NEXT: br label [[BB_0]] +; CHECK: exit: +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[PTR]], align 1 +; CHECK-NEXT: ret i8 [[LOAD]] +; +entry: + %alloca = alloca [32 x i8], align 4, addrspace(1) + %val1 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 1 + br label %bb.0 + +bb.0: + %ptr = phi ptr addrspace(1) [ %val1, %entry ], [ %val2, %bb.1 ] + br i1 %cond, label %bb.1, label %exit + +bb.1: + %val2 = getelementptr inbounds [32 x i8], ptr addrspace(1) %alloca, i32 0, i32 2 + call void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1) %alloca, ptr @g1, i64 256, i1 false) + br label %bb.0 + +exit: + %load = load i8, ptr addrspace(1) %ptr + ret i8 %load +} + +declare void @llvm.memcpy.p1i8.p0i8.i64(ptr addrspace(1), ptr, i64, i1) +declare void @llvm.memcpy.p0.p0.i64(ptr, ptr, i64, i1) +declare void @llvm.memcpy.p0.p1.i64(ptr, ptr addrspace(1), i64, i1)