diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -109,12 +109,29 @@ STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion"); STATISTIC(NumDeleted, "Number of instructions deleted"); STATISTIC(NumVectorized, "Number of vectorized aggregates"); +STATISTIC(NumAllocaSpills, "Number of alloca spills before pointer escapes"); +STATISTIC(NumAllocaReloads, + "Number of reloads of alloca after pointer escapes"); /// Hidden option to experiment with completely strict handling of inbounds /// GEPs. static cl::opt SROAStrictInbounds("sroa-strict-inbounds", cl::init(false), cl::Hidden); +/// Controls the beneficiality of promoting of alloca's that escape +/// normal promotion by spilling/reloading their contents into helper alloca +/// around the escape points. The thresholds are `OR`'ed, i.e. if the alloca +/// has non-zero occupancy and +/// EITHER the alloca size is not greater than SROAEscapingAllocaMaxSize +/// OR the occupancy percentage is at least SROAEscapingAllocaMinOccupancy +/// then it is promoted. +static cl::opt + SROAEscapingAllocaMaxSize("sroa-sroa-escaping-alloca-max-size", cl::Hidden, + cl::init(32)); +static cl::opt + SROAEscapingAllocaMinOccupancy("sroa-escaping-alloca-min-occupancy", + cl::Hidden, cl::init(80)); + namespace { /// A custom IRBuilder inserter which prefixes all names, but only in @@ -221,7 +238,7 @@ class llvm::sroa::AllocaSlices { public: /// Construct the slices of a particular alloca. - AllocaSlices(const DataLayout &DL, AllocaInst &AI); + AllocaSlices(const DataLayout &DL, AllocaInst &AI, bool &Changed); /// Test whether a pointer to the allocation escapes our analysis. /// @@ -298,11 +315,18 @@ friend class AllocaSlices::SliceBuilder; + Instruction *fixupRewritableEscapes(AllocaInst &AI, bool &Changed); + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// Handle to alloca instruction to simplify method interfaces. AllocaInst &AI; #endif + /// Certain escaping uses of an alloca (non-capturing-ones) + /// do not prevent promotion, but we have to rewrite them + /// to make promotion possible. This records all such uses. + SmallVector> RewritableEscapes; + /// The instruction responsible for this alloca not having a known set /// of slices. /// @@ -1058,11 +1082,19 @@ void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); } + void visitCallBase(CallBase &CB) { + if (!IsOffsetKnown || !CB.doesNotCapture(U->getOperandNo())) + return PI.setAborted(&CB); + // If we know that the callee does not retain the pointer, + // then it does not prevent SROA, although we have to workaround this. + AS.RewritableEscapes.emplace_back(U, Offset); + } + /// Disable SROA entirely if there are unhandled users of the alloca. void visitInstruction(Instruction &I) { PI.setAborted(&I); } }; -AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) +AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI, bool &Changed) : #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) AI(AI), @@ -1079,6 +1111,77 @@ return; } + // We may have found that the pointer to the AI escapes, but isn't captured. + if (!RewritableEscapes.empty()) { + LLVM_DEBUG(dbgs() << "Alloca is escaped by calls! Original slices:\n"); + LLVM_DEBUG(print(dbgs())); + LLVM_DEBUG(dbgs() << "Escapes:\n"; for (const auto &E + : RewritableEscapes) dbgs() + << " " << *E.first->getUser() << "\n";); + + auto IsProfitableToTransform = [&]() { + unsigned AllocatedBytes = *AI.getAllocationSizeInBits(DL) / 8; + APInt LoadedBytes(AllocatedBytes, 0); + APInt StoredBytes(AllocatedBytes, 0); + for (const Slice &S : Slices) { + if (S.isDead()) + continue; + auto *I = cast(S.getUse()->getUser()); + switch (unsigned Opc = I->getOpcode()) { + default: + continue; + case Instruction::Load: + case Instruction::Store: { + APInt &Map = Opc == Instruction::Load ? LoadedBytes : StoredBytes; + Map.setBits(S.beginOffset(), S.endOffset()); + continue; + } + } + } + APInt LoadedAndStoredBytes = LoadedBytes; + LoadedAndStoredBytes &= StoredBytes; + unsigned UsedBytes = LoadedAndStoredBytes.countPopulation(); + unsigned OccupancyPct = divideCeil(100 * UsedBytes, AllocatedBytes); + LLVM_DEBUG(dbgs() << "Performing profitability check. "); + LLVM_DEBUG(dbgs() << "Alloca size: " << AllocatedBytes + << ", used bytes: " << UsedBytes + << ", occupancy: " << OccupancyPct << "%\n"); + bool IsProfitable = + OccupancyPct > 0 && (AllocatedBytes <= SROAEscapingAllocaMaxSize || + OccupancyPct >= SROAEscapingAllocaMinOccupancy); + LLVM_DEBUG(dbgs() << "Rule: occupancy > 0% && (alloca size <= " + << SROAEscapingAllocaMaxSize << " || occupancy >= " + << SROAEscapingAllocaMinOccupancy << "%), deeming it " + << (IsProfitable ? "profitable!" : "NOT profitable.") + << "\n"); + return IsProfitable; + }; + + // Are there any slices of an alloca that would benefit from the promotion? + // If the alloca is only used by escaping calls, and isn't loaded/stored to, + // then there is no point in promoting it. + LLVM_DEBUG(dbgs() << "Can rewrite escapes and make alloca promoteable.\n"); + if (!IsProfitableToTransform()) { + // Backtrack, and pretend that we aborted at the first escape. + LLVM_DEBUG(dbgs() << "Profitability check failed, will not try to " + "promote due to the escapes.\n"); + PointerEscapingInstr = + cast(RewritableEscapes.front().first->getUser()); + return; + } + + // Rewrite these uses to not affect the promotion of the alloca. + Instruction *NewUsesOfAI = fixupRewritableEscapes(AI, Changed); + assert(NewUsesOfAI && "Returns non-null."); + + // Reanalyze new uses of an alloca. + LLVM_DEBUG(dbgs() << "Reanalyzing new loads/stores.\n"); + SliceBuilder::PtrInfo PtrI = PB.visitPtr(*NewUsesOfAI); + (void)PtrI; + assert(!PtrI.isEscaped() && !PtrI.isAborted() && + "Failed to analyze new memory operations?"); + } + llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); }); // Sort the uses. This arranges for the offsets to be in ascending order, @@ -3578,6 +3681,130 @@ } // end anonymous namespace +Instruction *AllocaSlices::fixupRewritableEscapes(AllocaInst &AI, + bool &Changed) { + assert(!RewritableEscapes.empty() && + "Should not be called if there is nothing to rewrite."); + + LLVM_DEBUG(dbgs() << "Rewriting escapes to operate on a new helper alloca\n"); + + Changed |= true; + + Instruction *OrigAlloca = &AI; + + // A cache of rebased pointers. + SmallDenseMap, Value *> RebasedPtrsCSE; + // A single instruction may consume multiple pointers into alloca, + // let's spill only once per instruction. + // Around which instructions have we performed spill/reload already? + SmallDenseSet SpillForInstAlreadyPerformed; + + // First, duplicate the alloca. This is fine to do, + // since we know that the old alloca should go away. + auto *CloneAlloca = cast(OrigAlloca->clone()); + CloneAlloca->setName(OrigAlloca->getName() + ".remat"); + CloneAlloca->insertAfter(OrigAlloca); + + IRBuilderTy Builder(OrigAlloca->getContext()); + const DataLayout &DL = AI.getModule()->getDataLayout(); + + // In order to simplify our life, let's base all the new uses of an AI, + // off of a single new use-def. This will simplify it's reanalysis. + OrigAlloca = GetElementPtrInst::CreateInBounds( + AI.getAllocatedType(), OrigAlloca, None, AI.getName() + ".new.uses", + CloneAlloca); + + // Spill the entire original alloca into our new clone alloca. + auto SpillOrigAllocaBefore = [&](Instruction *InsertBefore) { + Builder.SetInsertPoint(InsertBefore); + Value *SpilledValue = Builder.CreateLoad(AI.getAllocatedType(), OrigAlloca, + AI.getName() + ".spill"); + Builder.CreateStore(SpilledValue, CloneAlloca); + ++NumAllocaSpills; + }; + + // Reload the entire original alloca from our new clone alloca. + auto ReloadOrigAllocaBefore = [&](Instruction *InsertBefore) { + Builder.SetInsertPoint(InsertBefore); + Value *ReloadedValue = Builder.CreateLoad( + AI.getAllocatedType(), CloneAlloca, AI.getName() + ".reload"); + Builder.CreateStore(ReloadedValue, OrigAlloca); + ++NumAllocaReloads; + }; + + // Rebase this pointer into orig alloca to be based on clone alloca. + auto RebaseOrigAllocaPtr = + [this, AllocaBB = OrigAlloca->getParent(), &Builder, &RebasedPtrsCSE, + CloneAlloca, DL](const std::pair &EscapingPtrUse) { + const APInt &Offset = EscapingPtrUse.second; + Use *U = EscapingPtrUse.first; + Type *PtrTy = U->get()->getType(); + Value *NewPtr; + + auto It = RebasedPtrsCSE.find({Offset, PtrTy}); + if (It != RebasedPtrsCSE.end()) + NewPtr = It->second; + else { + BasicBlock::iterator I = CloneAlloca->getIterator(); + while (isa(I)) { + ++I; + assert(I != AllocaBB->end() && "Block has no insertion point?"); + } + Builder.SetInsertPoint(&*I); + + NewPtr = getAdjustedPtr(Builder, DL, CloneAlloca, Offset, PtrTy, ""); + RebasedPtrsCSE[{Offset, PtrTy}] = NewPtr; + } + + auto *OldPtr = cast(*U); + U->set(NewPtr); + if (OldPtr->use_empty()) + DeadUsers.emplace_back(OldPtr); + }; + + // For each escaping pointer to the orig alloca. + for (const std::pair &RewritableEscape : RewritableEscapes) { + auto *EscapingUserInst = + cast(RewritableEscape.first->getUser()); + + // Rewrite this escaping pointer to be clone alloca-based. + RebaseOrigAllocaPtr(RewritableEscape); + + // Did we already spill/reload around this instruction? + if (SpillForInstAlreadyPerformed.contains(EscapingUserInst)) + continue; + // We did not. Let's do that now. + + // Before said instruction, spill the current state + // of the orig alloca into the clone alloca. + SpillOrigAllocaBefore(EscapingUserInst); + + // And after the instruction, restore the state of orig alloca. + // Note that if the instruction is a terminator, + // we have to do that on *each* path. + if (!EscapingUserInst->isTerminator()) + ReloadOrigAllocaBefore(EscapingUserInst->getNextNode()); + else { + for (BasicBlock *SuccBB : successors(EscapingUserInst->getParent())) { + BasicBlock::iterator I = SuccBB->getFirstInsertionPt(); + assert(I != SuccBB->end() && "Successor block has no insertion point?"); + ReloadOrigAllocaBefore(&*I); + } + } + + // If we happen to revisit this instruction (perhaps it takes several + // pointers into this alloca), don't redo spill/reload. + SpillForInstAlreadyPerformed.insert(EscapingUserInst); + } + + // Resplit any FCA load/stores we may have introduced. + LLVM_DEBUG( + dbgs() << "Done rewriting escapes, making new loads/stores analyzable\n"); + AggLoadStoreRewriter(DL).rewrite(*OrigAlloca); + + return OrigAlloca; +} + /// Strip aggregate type wrapping. /// /// This removes no-op aggregate types wrapping an underlying type. It will @@ -4602,7 +4829,7 @@ Changed |= AggRewriter.rewrite(AI); // Build the slices using a recursive instruction-visiting builder. - AllocaSlices AS(DL, AI); + AllocaSlices AS(DL, AI, Changed); LLVM_DEBUG(AS.print(dbgs())); if (AS.isEscaped()) return Changed; diff --git a/llvm/test/Transforms/PhaseOrdering/lifetime-sanitizer.ll b/llvm/test/Transforms/PhaseOrdering/lifetime-sanitizer.ll --- a/llvm/test/Transforms/PhaseOrdering/lifetime-sanitizer.ll +++ b/llvm/test/Transforms/PhaseOrdering/lifetime-sanitizer.ll @@ -9,7 +9,7 @@ declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) -declare void @foo(i8* nocapture) +declare void @foo(i8*) define void @asan() sanitize_address { entry: diff --git a/llvm/test/Transforms/SROA/non-capturing-call.ll b/llvm/test/Transforms/SROA/non-capturing-call.ll --- a/llvm/test/Transforms/SROA/non-capturing-call.ll +++ b/llvm/test/Transforms/SROA/non-capturing-call.ll @@ -5,43 +5,41 @@ define i32 @alloca_used_in_call(i32* nocapture nonnull readonly %data, i64 %n) { ; CHECK-LABEL: @alloca_used_in_call( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RETVAL_REMAT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_0]], [[LD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL]]) -; CHECK-NEXT: [[I1:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: ret i32 [[I1]] +; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL_REMAT]], align 4 +; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL_REMAT]]) +; CHECK-NEXT: [[RETVAL_RELOAD:%.*]] = load i32, i32* [[RETVAL_REMAT]], align 4 +; CHECK-NEXT: ret i32 [[RETVAL_RELOAD]] ; ; CHECK-OPAQUE-LABEL: @alloca_used_in_call( ; CHECK-OPAQUE-NEXT: entry: -; CHECK-OPAQUE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; CHECK-OPAQUE-NEXT: store i32 0, ptr [[RETVAL]], align 4 +; CHECK-OPAQUE-NEXT: [[RETVAL_REMAT:%.*]] = alloca i32, align 4 ; CHECK-OPAQUE-NEXT: br label [[LOOP:%.*]] ; CHECK-OPAQUE: loop: -; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-OPAQUE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-OPAQUE-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-OPAQUE-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 +; CHECK-OPAQUE-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_0]], [[LD]] ; CHECK-OPAQUE-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-OPAQUE-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-OPAQUE-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK-OPAQUE: exit: -; CHECK-OPAQUE-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(ptr nocapture nonnull [[RETVAL]]) -; CHECK-OPAQUE-NEXT: [[I1:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-OPAQUE-NEXT: ret i32 [[I1]] +; CHECK-OPAQUE-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(ptr nocapture nonnull [[RETVAL_REMAT]]) +; CHECK-OPAQUE-NEXT: [[RETVAL_RELOAD:%.*]] = load i32, ptr [[RETVAL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: ret i32 [[RETVAL_RELOAD]] ; entry: %retval = alloca i32, align 4 @@ -131,45 +129,41 @@ define i32 @alloca_with_gep_used_in_call(i32* nocapture nonnull readonly %data, i64 %n) { ; CHECK-LABEL: @alloca_with_gep_used_in_call( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RETVAL_REMAT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_0]], [[LD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[RETVAL]], i32 0 -; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(i32* nocapture nonnull [[GEP]]) -; CHECK-NEXT: [[I1:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: ret i32 [[I1]] +; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL_REMAT]], align 4 +; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL_REMAT]]) +; CHECK-NEXT: [[RETVAL_RELOAD:%.*]] = load i32, i32* [[RETVAL_REMAT]], align 4 +; CHECK-NEXT: ret i32 [[RETVAL_RELOAD]] ; ; CHECK-OPAQUE-LABEL: @alloca_with_gep_used_in_call( ; CHECK-OPAQUE-NEXT: entry: -; CHECK-OPAQUE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; CHECK-OPAQUE-NEXT: store i32 0, ptr [[RETVAL]], align 4 +; CHECK-OPAQUE-NEXT: [[RETVAL_REMAT:%.*]] = alloca i32, align 4 ; CHECK-OPAQUE-NEXT: br label [[LOOP:%.*]] ; CHECK-OPAQUE: loop: -; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-OPAQUE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-OPAQUE-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-OPAQUE-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 +; CHECK-OPAQUE-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_0]], [[LD]] ; CHECK-OPAQUE-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-OPAQUE-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-OPAQUE-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK-OPAQUE: exit: -; CHECK-OPAQUE-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[RETVAL]], i32 0 -; CHECK-OPAQUE-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(ptr nocapture nonnull [[GEP]]) -; CHECK-OPAQUE-NEXT: [[I1:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-OPAQUE-NEXT: ret i32 [[I1]] +; CHECK-OPAQUE-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(ptr nocapture nonnull [[RETVAL_REMAT]]) +; CHECK-OPAQUE-NEXT: [[RETVAL_RELOAD:%.*]] = load i32, ptr [[RETVAL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: ret i32 [[RETVAL_RELOAD]] ; entry: %retval = alloca i32, align 4 @@ -260,59 +254,61 @@ define i32 @alloca_used_in_maybe_throwing_call(i32* nocapture nonnull readonly %data, i64 %n) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @alloca_used_in_maybe_throwing_call( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RETVAL_REMAT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_0]], [[LD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[I0:%.*]] = invoke i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL]]) +; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL_REMAT]], align 4 +; CHECK-NEXT: [[I0:%.*]] = invoke i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL_REMAT]]) ; CHECK-NEXT: to label [[CONT:%.*]] unwind label [[UW:%.*]] ; CHECK: cont: +; CHECK-NEXT: [[RETVAL_RELOAD:%.*]] = load i32, i32* [[RETVAL_REMAT]], align 4 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: uw: ; CHECK-NEXT: [[I1:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* null +; CHECK-NEXT: [[RETVAL_RELOAD1:%.*]] = load i32, i32* [[RETVAL_REMAT]], align 4 ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[I2:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: ret i32 [[I2]] +; CHECK-NEXT: [[RETVAL_1:%.*]] = phi i32 [ [[RETVAL_RELOAD]], [[CONT]] ], [ [[RETVAL_RELOAD1]], [[UW]] ] +; CHECK-NEXT: ret i32 [[RETVAL_1]] ; ; CHECK-OPAQUE-LABEL: @alloca_used_in_maybe_throwing_call( ; CHECK-OPAQUE-NEXT: entry: -; CHECK-OPAQUE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; CHECK-OPAQUE-NEXT: store i32 0, ptr [[RETVAL]], align 4 +; CHECK-OPAQUE-NEXT: [[RETVAL_REMAT:%.*]] = alloca i32, align 4 ; CHECK-OPAQUE-NEXT: br label [[LOOP:%.*]] ; CHECK-OPAQUE: loop: -; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-OPAQUE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-OPAQUE-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-OPAQUE-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 +; CHECK-OPAQUE-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_0]], [[LD]] ; CHECK-OPAQUE-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-OPAQUE-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-OPAQUE-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK-OPAQUE: exit: -; CHECK-OPAQUE-NEXT: [[I0:%.*]] = invoke i32 @user_of_alloca(ptr nocapture nonnull [[RETVAL]]) +; CHECK-OPAQUE-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[I0:%.*]] = invoke i32 @user_of_alloca(ptr nocapture nonnull [[RETVAL_REMAT]]) ; CHECK-OPAQUE-NEXT: to label [[CONT:%.*]] unwind label [[UW:%.*]] ; CHECK-OPAQUE: cont: +; CHECK-OPAQUE-NEXT: [[RETVAL_RELOAD:%.*]] = load i32, ptr [[RETVAL_REMAT]], align 4 ; CHECK-OPAQUE-NEXT: br label [[END:%.*]] ; CHECK-OPAQUE: uw: ; CHECK-OPAQUE-NEXT: [[I1:%.*]] = landingpad { ptr, i32 } ; CHECK-OPAQUE-NEXT: catch ptr null +; CHECK-OPAQUE-NEXT: [[RETVAL_RELOAD1:%.*]] = load i32, ptr [[RETVAL_REMAT]], align 4 ; CHECK-OPAQUE-NEXT: br label [[END]] ; CHECK-OPAQUE: end: -; CHECK-OPAQUE-NEXT: [[I2:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-OPAQUE-NEXT: ret i32 [[I2]] +; CHECK-OPAQUE-NEXT: [[RETVAL_1:%.*]] = phi i32 [ [[RETVAL_RELOAD]], [[CONT]] ], [ [[RETVAL_RELOAD1]], [[UW]] ] +; CHECK-OPAQUE-NEXT: ret i32 [[RETVAL_1]] ; entry: %retval = alloca i32, align 4 @@ -348,55 +344,55 @@ define i32 @alloca_used_in_maybe_throwing_call_with_same_dests(i32* nocapture nonnull readonly %data, i64 %n) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @alloca_used_in_maybe_throwing_call_with_same_dests( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RETVAL_REMAT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_0]], [[LD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[I0:%.*]] = invoke i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL]]) +; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL_REMAT]], align 4 +; CHECK-NEXT: [[I0:%.*]] = invoke i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL_REMAT]]) ; CHECK-NEXT: to label [[END:%.*]] unwind label [[UW:%.*]] ; CHECK: uw: ; CHECK-NEXT: [[I1:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* null +; CHECK-NEXT: [[RETVAL_RELOAD1:%.*]] = load i32, i32* [[RETVAL_REMAT]], align 4 ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[I2:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: ret i32 [[I2]] +; CHECK-NEXT: [[RETVAL_RELOAD:%.*]] = load i32, i32* [[RETVAL_REMAT]], align 4 +; CHECK-NEXT: ret i32 [[RETVAL_RELOAD]] ; ; CHECK-OPAQUE-LABEL: @alloca_used_in_maybe_throwing_call_with_same_dests( ; CHECK-OPAQUE-NEXT: entry: -; CHECK-OPAQUE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; CHECK-OPAQUE-NEXT: store i32 0, ptr [[RETVAL]], align 4 +; CHECK-OPAQUE-NEXT: [[RETVAL_REMAT:%.*]] = alloca i32, align 4 ; CHECK-OPAQUE-NEXT: br label [[LOOP:%.*]] ; CHECK-OPAQUE: loop: -; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-OPAQUE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-OPAQUE-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-OPAQUE-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 +; CHECK-OPAQUE-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_0]], [[LD]] ; CHECK-OPAQUE-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-OPAQUE-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-OPAQUE-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK-OPAQUE: exit: -; CHECK-OPAQUE-NEXT: [[I0:%.*]] = invoke i32 @user_of_alloca(ptr nocapture nonnull [[RETVAL]]) +; CHECK-OPAQUE-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[I0:%.*]] = invoke i32 @user_of_alloca(ptr nocapture nonnull [[RETVAL_REMAT]]) ; CHECK-OPAQUE-NEXT: to label [[END:%.*]] unwind label [[UW:%.*]] ; CHECK-OPAQUE: uw: ; CHECK-OPAQUE-NEXT: [[I1:%.*]] = landingpad { ptr, i32 } ; CHECK-OPAQUE-NEXT: catch ptr null +; CHECK-OPAQUE-NEXT: [[RETVAL_RELOAD1:%.*]] = load i32, ptr [[RETVAL_REMAT]], align 4 ; CHECK-OPAQUE-NEXT: br label [[END]] ; CHECK-OPAQUE: end: -; CHECK-OPAQUE-NEXT: [[I2:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-OPAQUE-NEXT: ret i32 [[I2]] +; CHECK-OPAQUE-NEXT: [[RETVAL_RELOAD:%.*]] = load i32, ptr [[RETVAL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: ret i32 [[RETVAL_RELOAD]] ; entry: %retval = alloca i32, align 4 @@ -429,60 +425,54 @@ define [2 x i32] @part_of_alloca_used_in_call(i32* nocapture nonnull readonly %data, i64 %n) { ; CHECK-LABEL: @part_of_alloca_used_in_call( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL_FULL:%.*]] = alloca [2 x i32], align 4 -; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-NEXT: store i32 0, i32* [[DOTFCA_0_GEP]], align 4 -; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-NEXT: store i32 0, i32* [[DOTFCA_1_GEP]], align 4 -; CHECK-NEXT: [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i64 0, i64 1 +; CHECK-NEXT: [[RETVAL_FULL_REMAT:%.*]] = alloca [2 x i32], align 4 +; CHECK-NEXT: [[SROA_IDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL_REMAT]], i64 0, i64 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RETVAL_FULL_SROA_4_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_SROA_4_0]], [[LD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL]]) -; CHECK-NEXT: [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-NEXT: [[I1_FCA_0_LOAD:%.*]] = load i32, i32* [[I1_FCA_0_GEP]], align 4 -; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[I1_FCA_0_LOAD]], 0 -; CHECK-NEXT: [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-NEXT: [[I1_FCA_1_LOAD:%.*]] = load i32, i32* [[I1_FCA_1_GEP]], align 4 -; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1 +; CHECK-NEXT: [[RETVAL_FULL_SPILL_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 0, 0 +; CHECK-NEXT: [[RETVAL_FULL_SPILL_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[RETVAL_FULL_SPILL_FCA_0_INSERT]], i32 [[RDX_INC]], 1 +; CHECK-NEXT: store [2 x i32] [[RETVAL_FULL_SPILL_FCA_1_INSERT]], [2 x i32]* [[RETVAL_FULL_REMAT]], align 4 +; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(i32* nocapture nonnull [[SROA_IDX]]) +; CHECK-NEXT: [[RETVAL_FULL_RELOAD:%.*]] = load [2 x i32], [2 x i32]* [[RETVAL_FULL_REMAT]], align 4 +; CHECK-NEXT: [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 0 +; CHECK-NEXT: [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 1 +; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT]], 0 +; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT]], 1 ; CHECK-NEXT: ret [2 x i32] [[I1_FCA_1_INSERT]] ; ; CHECK-OPAQUE-LABEL: @part_of_alloca_used_in_call( ; CHECK-OPAQUE-NEXT: entry: -; CHECK-OPAQUE-NEXT: [[RETVAL_FULL:%.*]] = alloca [2 x i32], align 4 -; CHECK-OPAQUE-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-OPAQUE-NEXT: store i32 0, ptr [[DOTFCA_0_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-OPAQUE-NEXT: store i32 0, ptr [[DOTFCA_1_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 1 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_REMAT:%.*]] = alloca [2 x i32], align 4 +; CHECK-OPAQUE-NEXT: [[SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[RETVAL_FULL_REMAT]], i64 4 ; CHECK-OPAQUE-NEXT: br label [[LOOP:%.*]] ; CHECK-OPAQUE: loop: -; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_SROA_4_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-OPAQUE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-OPAQUE-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-OPAQUE-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 +; CHECK-OPAQUE-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_SROA_4_0]], [[LD]] ; CHECK-OPAQUE-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-OPAQUE-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-OPAQUE-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK-OPAQUE: exit: -; CHECK-OPAQUE-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(ptr nocapture nonnull [[RETVAL]]) -; CHECK-OPAQUE-NEXT: [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-OPAQUE-NEXT: [[I1_FCA_0_LOAD:%.*]] = load i32, ptr [[I1_FCA_0_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[I1_FCA_0_LOAD]], 0 -; CHECK-OPAQUE-NEXT: [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-OPAQUE-NEXT: [[I1_FCA_1_LOAD:%.*]] = load i32, ptr [[I1_FCA_1_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_SPILL_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 0, 0 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_SPILL_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[RETVAL_FULL_SPILL_FCA_0_INSERT]], i32 [[RDX_INC]], 1 +; CHECK-OPAQUE-NEXT: store [2 x i32] [[RETVAL_FULL_SPILL_FCA_1_INSERT]], ptr [[RETVAL_FULL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[I0:%.*]] = call i32 @user_of_alloca(ptr nocapture nonnull [[SROA_IDX]]) +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD:%.*]] = load [2 x i32], ptr [[RETVAL_FULL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 0 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 1 +; CHECK-OPAQUE-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT]], 0 +; CHECK-OPAQUE-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT]], 1 ; CHECK-OPAQUE-NEXT: ret [2 x i32] [[I1_FCA_1_INSERT]] ; entry: @@ -511,62 +501,55 @@ define [2 x i32] @all_parts_of_alloca_used_in_call_with_multiple_args(i32* nocapture nonnull readonly %data, i64 %n) { ; CHECK-LABEL: @all_parts_of_alloca_used_in_call_with_multiple_args( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL_FULL:%.*]] = alloca [2 x i32], align 4 -; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-NEXT: store i32 0, i32* [[DOTFCA_0_GEP]], align 4 -; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-NEXT: store i32 0, i32* [[DOTFCA_1_GEP]], align 4 -; CHECK-NEXT: [[RETVAL_BASE:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i64 0, i64 0 -; CHECK-NEXT: [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i64 0, i64 1 +; CHECK-NEXT: [[RETVAL_FULL_REMAT:%.*]] = alloca [2 x i32], align 4 +; CHECK-NEXT: [[SROA_IDX1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL_REMAT]], i64 0, i64 1 +; CHECK-NEXT: [[SROA_IDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL_REMAT]], i64 0, i64 0 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RETVAL_FULL_SROA_4_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_SROA_4_0]], [[LD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(i32* nocapture nonnull [[RETVAL]], i32* nocapture nonnull [[RETVAL_BASE]]) -; CHECK-NEXT: [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-NEXT: [[I1_FCA_0_LOAD:%.*]] = load i32, i32* [[I1_FCA_0_GEP]], align 4 -; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[I1_FCA_0_LOAD]], 0 -; CHECK-NEXT: [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-NEXT: [[I1_FCA_1_LOAD:%.*]] = load i32, i32* [[I1_FCA_1_GEP]], align 4 -; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1 +; CHECK-NEXT: [[RETVAL_FULL_SPILL_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 0, 0 +; CHECK-NEXT: [[RETVAL_FULL_SPILL_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[RETVAL_FULL_SPILL_FCA_0_INSERT]], i32 [[RDX_INC]], 1 +; CHECK-NEXT: store [2 x i32] [[RETVAL_FULL_SPILL_FCA_1_INSERT]], [2 x i32]* [[RETVAL_FULL_REMAT]], align 4 +; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(i32* nocapture nonnull [[SROA_IDX1]], i32* nocapture nonnull [[SROA_IDX]]) +; CHECK-NEXT: [[RETVAL_FULL_RELOAD:%.*]] = load [2 x i32], [2 x i32]* [[RETVAL_FULL_REMAT]], align 4 +; CHECK-NEXT: [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 0 +; CHECK-NEXT: [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 1 +; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT]], 0 +; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT]], 1 ; CHECK-NEXT: ret [2 x i32] [[I1_FCA_1_INSERT]] ; ; CHECK-OPAQUE-LABEL: @all_parts_of_alloca_used_in_call_with_multiple_args( ; CHECK-OPAQUE-NEXT: entry: -; CHECK-OPAQUE-NEXT: [[RETVAL_FULL:%.*]] = alloca [2 x i32], align 4 -; CHECK-OPAQUE-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-OPAQUE-NEXT: store i32 0, ptr [[DOTFCA_0_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-OPAQUE-NEXT: store i32 0, ptr [[DOTFCA_1_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[RETVAL_BASE:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 0 -; CHECK-OPAQUE-NEXT: [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 1 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_REMAT:%.*]] = alloca [2 x i32], align 4 +; CHECK-OPAQUE-NEXT: [[SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[RETVAL_FULL_REMAT]], i64 4 ; CHECK-OPAQUE-NEXT: br label [[LOOP:%.*]] ; CHECK-OPAQUE: loop: -; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_SROA_4_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-OPAQUE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-OPAQUE-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-OPAQUE-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 +; CHECK-OPAQUE-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_SROA_4_0]], [[LD]] ; CHECK-OPAQUE-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-OPAQUE-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-OPAQUE-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK-OPAQUE: exit: -; CHECK-OPAQUE-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr nocapture nonnull [[RETVAL]], ptr nocapture nonnull [[RETVAL_BASE]]) -; CHECK-OPAQUE-NEXT: [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-OPAQUE-NEXT: [[I1_FCA_0_LOAD:%.*]] = load i32, ptr [[I1_FCA_0_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[I1_FCA_0_LOAD]], 0 -; CHECK-OPAQUE-NEXT: [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-OPAQUE-NEXT: [[I1_FCA_1_LOAD:%.*]] = load i32, ptr [[I1_FCA_1_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_SPILL_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 0, 0 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_SPILL_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[RETVAL_FULL_SPILL_FCA_0_INSERT]], i32 [[RDX_INC]], 1 +; CHECK-OPAQUE-NEXT: store [2 x i32] [[RETVAL_FULL_SPILL_FCA_1_INSERT]], ptr [[RETVAL_FULL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr nocapture nonnull [[SROA_IDX]], ptr nocapture nonnull [[RETVAL_FULL_REMAT]]) +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD:%.*]] = load [2 x i32], ptr [[RETVAL_FULL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 0 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 1 +; CHECK-OPAQUE-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT]], 0 +; CHECK-OPAQUE-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT]], 1 ; CHECK-OPAQUE-NEXT: ret [2 x i32] [[I1_FCA_1_INSERT]] ; entry: @@ -596,60 +579,54 @@ define [2 x i32] @part_of_alloca_used_in_call_with_multiple_args(i32* nocapture nonnull readonly %data, i64 %n) { ; CHECK-LABEL: @part_of_alloca_used_in_call_with_multiple_args( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL_FULL:%.*]] = alloca [2 x i32], align 4 -; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-NEXT: store i32 0, i32* [[DOTFCA_0_GEP]], align 4 -; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-NEXT: store i32 0, i32* [[DOTFCA_1_GEP]], align 4 -; CHECK-NEXT: [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i64 0, i64 1 +; CHECK-NEXT: [[RETVAL_FULL_REMAT:%.*]] = alloca [2 x i32], align 4 +; CHECK-NEXT: [[SROA_IDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL_REMAT]], i64 0, i64 1 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RETVAL_FULL_SROA_4_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_SROA_4_0]], [[LD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(i32* nocapture nonnull [[RETVAL]], i32* nocapture nonnull [[RETVAL]]) -; CHECK-NEXT: [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-NEXT: [[I1_FCA_0_LOAD:%.*]] = load i32, i32* [[I1_FCA_0_GEP]], align 4 -; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[I1_FCA_0_LOAD]], 0 -; CHECK-NEXT: [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-NEXT: [[I1_FCA_1_LOAD:%.*]] = load i32, i32* [[I1_FCA_1_GEP]], align 4 -; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1 +; CHECK-NEXT: [[RETVAL_FULL_SPILL_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 0, 0 +; CHECK-NEXT: [[RETVAL_FULL_SPILL_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[RETVAL_FULL_SPILL_FCA_0_INSERT]], i32 [[RDX_INC]], 1 +; CHECK-NEXT: store [2 x i32] [[RETVAL_FULL_SPILL_FCA_1_INSERT]], [2 x i32]* [[RETVAL_FULL_REMAT]], align 4 +; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(i32* nocapture nonnull [[SROA_IDX]], i32* nocapture nonnull [[SROA_IDX]]) +; CHECK-NEXT: [[RETVAL_FULL_RELOAD:%.*]] = load [2 x i32], [2 x i32]* [[RETVAL_FULL_REMAT]], align 4 +; CHECK-NEXT: [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 0 +; CHECK-NEXT: [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 1 +; CHECK-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT]], 0 +; CHECK-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT]], 1 ; CHECK-NEXT: ret [2 x i32] [[I1_FCA_1_INSERT]] ; ; CHECK-OPAQUE-LABEL: @part_of_alloca_used_in_call_with_multiple_args( ; CHECK-OPAQUE-NEXT: entry: -; CHECK-OPAQUE-NEXT: [[RETVAL_FULL:%.*]] = alloca [2 x i32], align 4 -; CHECK-OPAQUE-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-OPAQUE-NEXT: store i32 0, ptr [[DOTFCA_0_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-OPAQUE-NEXT: store i32 0, ptr [[DOTFCA_1_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 1 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_REMAT:%.*]] = alloca [2 x i32], align 4 +; CHECK-OPAQUE-NEXT: [[SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[RETVAL_FULL_REMAT]], i64 4 ; CHECK-OPAQUE-NEXT: br label [[LOOP:%.*]] ; CHECK-OPAQUE: loop: -; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_SROA_4_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-OPAQUE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-OPAQUE-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-OPAQUE-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 +; CHECK-OPAQUE-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_SROA_4_0]], [[LD]] ; CHECK-OPAQUE-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-OPAQUE-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-OPAQUE-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK-OPAQUE: exit: -; CHECK-OPAQUE-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr nocapture nonnull [[RETVAL]], ptr nocapture nonnull [[RETVAL]]) -; CHECK-OPAQUE-NEXT: [[I1_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-OPAQUE-NEXT: [[I1_FCA_0_LOAD:%.*]] = load i32, ptr [[I1_FCA_0_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[I1_FCA_0_LOAD]], 0 -; CHECK-OPAQUE-NEXT: [[I1_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-OPAQUE-NEXT: [[I1_FCA_1_LOAD:%.*]] = load i32, ptr [[I1_FCA_1_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[I1_FCA_1_LOAD]], 1 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_SPILL_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 0, 0 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_SPILL_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[RETVAL_FULL_SPILL_FCA_0_INSERT]], i32 [[RDX_INC]], 1 +; CHECK-OPAQUE-NEXT: store [2 x i32] [[RETVAL_FULL_SPILL_FCA_1_INSERT]], ptr [[RETVAL_FULL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr nocapture nonnull [[SROA_IDX]], ptr nocapture nonnull [[SROA_IDX]]) +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD:%.*]] = load [2 x i32], ptr [[RETVAL_FULL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 0 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 1 +; CHECK-OPAQUE-NEXT: [[I1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT]], 0 +; CHECK-OPAQUE-NEXT: [[I1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I1_FCA_0_INSERT]], i32 [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT]], 1 ; CHECK-OPAQUE-NEXT: ret [2 x i32] [[I1_FCA_1_INSERT]] ; entry: @@ -678,70 +655,75 @@ define [2 x i32] @all_parts_of_alloca_used_in_calls_with_multiple_args(i32* nocapture nonnull readonly %data, i64 %n) { ; CHECK-LABEL: @all_parts_of_alloca_used_in_calls_with_multiple_args( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL_FULL:%.*]] = alloca [2 x i32], align 4 +; CHECK-NEXT: [[RETVAL_FULL_REMAT:%.*]] = alloca [2 x i32], align 4 ; CHECK-NEXT: [[SOME_ANOTHER_ALLOCA_FULL:%.*]] = alloca [42 x i32], align 4 -; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-NEXT: store i32 0, i32* [[DOTFCA_0_GEP]], align 4 -; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-NEXT: store i32 0, i32* [[DOTFCA_1_GEP]], align 4 -; CHECK-NEXT: [[RETVAL_BASE:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i64 0, i64 0 -; CHECK-NEXT: [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i64 0, i64 1 +; CHECK-NEXT: [[SROA_IDX3:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL_REMAT]], i64 0, i64 1 +; CHECK-NEXT: [[SROA_IDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL_REMAT]], i64 0, i64 0 ; CHECK-NEXT: [[SOME_ANOTHER_ALLOCA:%.*]] = getelementptr inbounds [42 x i32], [42 x i32]* [[SOME_ANOTHER_ALLOCA_FULL]], i64 0, i64 0 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RETVAL_FULL_SROA_6_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_SROA_6_0]], [[LD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(i32* nocapture nonnull [[RETVAL]], i32* nocapture nonnull [[RETVAL_BASE]]) -; CHECK-NEXT: [[I1:%.*]] = call i32 @user_of_alloca_with_multiple_args(i32* nocapture nonnull [[RETVAL_BASE]], i32* nocapture nonnull [[RETVAL]]) +; CHECK-NEXT: [[RETVAL_FULL_SPILL_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 0, 0 +; CHECK-NEXT: [[RETVAL_FULL_SPILL_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[RETVAL_FULL_SPILL_FCA_0_INSERT]], i32 [[RDX_INC]], 1 +; CHECK-NEXT: store [2 x i32] [[RETVAL_FULL_SPILL_FCA_1_INSERT]], [2 x i32]* [[RETVAL_FULL_REMAT]], align 4 +; CHECK-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(i32* nocapture nonnull [[SROA_IDX3]], i32* nocapture nonnull [[SROA_IDX]]) +; CHECK-NEXT: [[RETVAL_FULL_RELOAD:%.*]] = load [2 x i32], [2 x i32]* [[RETVAL_FULL_REMAT]], align 4 +; CHECK-NEXT: [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 0 +; CHECK-NEXT: [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 1 +; CHECK-NEXT: [[RETVAL_FULL_SPILL1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT]], 0 +; CHECK-NEXT: [[RETVAL_FULL_SPILL1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[RETVAL_FULL_SPILL1_FCA_0_INSERT]], i32 [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT]], 1 +; CHECK-NEXT: store [2 x i32] [[RETVAL_FULL_SPILL1_FCA_1_INSERT]], [2 x i32]* [[RETVAL_FULL_REMAT]], align 4 +; CHECK-NEXT: [[I1:%.*]] = call i32 @user_of_alloca_with_multiple_args(i32* nocapture nonnull [[SROA_IDX]], i32* nocapture nonnull [[SROA_IDX3]]) +; CHECK-NEXT: [[RETVAL_FULL_RELOAD2:%.*]] = load [2 x i32], [2 x i32]* [[RETVAL_FULL_REMAT]], align 4 +; CHECK-NEXT: [[RETVAL_FULL_RELOAD2_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD2]], 0 +; CHECK-NEXT: [[RETVAL_FULL_RELOAD2_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD2]], 1 ; CHECK-NEXT: [[I2:%.*]] = call i32 @capture_of_alloca(i32* [[SOME_ANOTHER_ALLOCA]]) -; CHECK-NEXT: [[I3_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-NEXT: [[I3_FCA_0_LOAD:%.*]] = load i32, i32* [[I3_FCA_0_GEP]], align 4 -; CHECK-NEXT: [[I3_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[I3_FCA_0_LOAD]], 0 -; CHECK-NEXT: [[I3_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-NEXT: [[I3_FCA_1_LOAD:%.*]] = load i32, i32* [[I3_FCA_1_GEP]], align 4 -; CHECK-NEXT: [[I3_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I3_FCA_0_INSERT]], i32 [[I3_FCA_1_LOAD]], 1 +; CHECK-NEXT: [[I3_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[RETVAL_FULL_RELOAD2_FCA_0_EXTRACT]], 0 +; CHECK-NEXT: [[I3_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I3_FCA_0_INSERT]], i32 [[RETVAL_FULL_RELOAD2_FCA_1_EXTRACT]], 1 ; CHECK-NEXT: ret [2 x i32] [[I3_FCA_1_INSERT]] ; ; CHECK-OPAQUE-LABEL: @all_parts_of_alloca_used_in_calls_with_multiple_args( ; CHECK-OPAQUE-NEXT: entry: -; CHECK-OPAQUE-NEXT: [[RETVAL_FULL:%.*]] = alloca [2 x i32], align 4 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_REMAT:%.*]] = alloca [2 x i32], align 4 ; CHECK-OPAQUE-NEXT: [[SOME_ANOTHER_ALLOCA_FULL:%.*]] = alloca [42 x i32], align 4 -; CHECK-OPAQUE-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-OPAQUE-NEXT: store i32 0, ptr [[DOTFCA_0_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-OPAQUE-NEXT: store i32 0, ptr [[DOTFCA_1_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[RETVAL_BASE:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 0 -; CHECK-OPAQUE-NEXT: [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i64 0, i64 1 +; CHECK-OPAQUE-NEXT: [[SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[RETVAL_FULL_REMAT]], i64 4 ; CHECK-OPAQUE-NEXT: [[SOME_ANOTHER_ALLOCA:%.*]] = getelementptr inbounds [42 x i32], ptr [[SOME_ANOTHER_ALLOCA_FULL]], i64 0, i64 0 ; CHECK-OPAQUE-NEXT: br label [[LOOP:%.*]] ; CHECK-OPAQUE: loop: -; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_SROA_6_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-OPAQUE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-OPAQUE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-OPAQUE-NEXT: [[LD:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX:%.*]] = load i32, ptr [[RETVAL]], align 4 -; CHECK-OPAQUE-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-OPAQUE-NEXT: store i32 [[RDX_INC]], ptr [[RETVAL]], align 4 +; CHECK-OPAQUE-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_SROA_6_0]], [[LD]] ; CHECK-OPAQUE-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-OPAQUE-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-OPAQUE-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK-OPAQUE: exit: -; CHECK-OPAQUE-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr nocapture nonnull [[RETVAL]], ptr nocapture nonnull [[RETVAL_BASE]]) -; CHECK-OPAQUE-NEXT: [[I1:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr nocapture nonnull [[RETVAL_BASE]], ptr nocapture nonnull [[RETVAL]]) +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_SPILL_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 0, 0 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_SPILL_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[RETVAL_FULL_SPILL_FCA_0_INSERT]], i32 [[RDX_INC]], 1 +; CHECK-OPAQUE-NEXT: store [2 x i32] [[RETVAL_FULL_SPILL_FCA_1_INSERT]], ptr [[RETVAL_FULL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[I0:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr nocapture nonnull [[SROA_IDX]], ptr nocapture nonnull [[RETVAL_FULL_REMAT]]) +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD:%.*]] = load [2 x i32], ptr [[RETVAL_FULL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 0 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 1 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_SPILL1_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT]], 0 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_SPILL1_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[RETVAL_FULL_SPILL1_FCA_0_INSERT]], i32 [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT]], 1 +; CHECK-OPAQUE-NEXT: store [2 x i32] [[RETVAL_FULL_SPILL1_FCA_1_INSERT]], ptr [[RETVAL_FULL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[I1:%.*]] = call i32 @user_of_alloca_with_multiple_args(ptr nocapture nonnull [[RETVAL_FULL_REMAT]], ptr nocapture nonnull [[SROA_IDX]]) +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD2:%.*]] = load [2 x i32], ptr [[RETVAL_FULL_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD2_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD2]], 0 +; CHECK-OPAQUE-NEXT: [[RETVAL_FULL_RELOAD2_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD2]], 1 ; CHECK-OPAQUE-NEXT: [[I2:%.*]] = call i32 @capture_of_alloca(ptr [[SOME_ANOTHER_ALLOCA]]) -; CHECK-OPAQUE-NEXT: [[I3_FCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-OPAQUE-NEXT: [[I3_FCA_0_LOAD:%.*]] = load i32, ptr [[I3_FCA_0_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[I3_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[I3_FCA_0_LOAD]], 0 -; CHECK-OPAQUE-NEXT: [[I3_FCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], ptr [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-OPAQUE-NEXT: [[I3_FCA_1_LOAD:%.*]] = load i32, ptr [[I3_FCA_1_GEP]], align 4 -; CHECK-OPAQUE-NEXT: [[I3_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I3_FCA_0_INSERT]], i32 [[I3_FCA_1_LOAD]], 1 +; CHECK-OPAQUE-NEXT: [[I3_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[RETVAL_FULL_RELOAD2_FCA_0_EXTRACT]], 0 +; CHECK-OPAQUE-NEXT: [[I3_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[I3_FCA_0_INSERT]], i32 [[RETVAL_FULL_RELOAD2_FCA_1_EXTRACT]], 1 ; CHECK-OPAQUE-NEXT: ret [2 x i32] [[I3_FCA_1_INSERT]] ; entry: @@ -799,31 +781,27 @@ define i64 @do_schedule_instrs_for_dce_after_fixups() { ; CHECK-LABEL: @do_schedule_instrs_for_dce_after_fixups( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[C:%.*]] = alloca i64, align 2 -; CHECK-NEXT: [[TMP0:%.*]] = bitcast i64* [[C]] to i8* -; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 1, i8* [[TMP0]]) -; CHECK-NEXT: store i64 0, i64* [[C]], align 4 -; CHECK-NEXT: [[ARRAYDECAY:%.*]] = bitcast i64* [[C]] to i32* +; CHECK-NEXT: [[C_REMAT:%.*]] = alloca i64, align 2 +; CHECK-NEXT: [[SROA_RAW_CAST:%.*]] = bitcast i64* [[C_REMAT]] to i8* +; CHECK-NEXT: [[SROA_RAW_IDX:%.*]] = getelementptr inbounds i8, i8* [[SROA_RAW_CAST]], i64 4 +; CHECK-NEXT: [[SROA_CAST:%.*]] = bitcast i8* [[SROA_RAW_IDX]] to i32* ; CHECK-NEXT: br label [[IF_END:%.*]] ; CHECK: if.end: -; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, i32* [[ARRAYDECAY]], i64 1 -; CHECK-NEXT: [[TMP1:%.*]] = call i32 @user_of_alloca(i32* [[ADD_PTR]]) -; CHECK-NEXT: [[LD:%.*]] = load i64, i64* [[C]], align 4 -; CHECK-NEXT: ret i64 [[LD]] +; CHECK-NEXT: store i64 0, i64* [[C_REMAT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @user_of_alloca(i32* [[SROA_CAST]]) +; CHECK-NEXT: [[C_RELOAD:%.*]] = load i64, i64* [[C_REMAT]], align 4 +; CHECK-NEXT: ret i64 [[C_RELOAD]] ; ; CHECK-OPAQUE-LABEL: @do_schedule_instrs_for_dce_after_fixups( ; CHECK-OPAQUE-NEXT: entry: -; CHECK-OPAQUE-NEXT: [[C:%.*]] = alloca i64, align 2 -; CHECK-OPAQUE-NEXT: [[TMP0:%.*]] = bitcast ptr [[C]] to ptr -; CHECK-OPAQUE-NEXT: call void @llvm.lifetime.start.p0(i64 1, ptr [[TMP0]]) -; CHECK-OPAQUE-NEXT: store i64 0, ptr [[C]], align 4 -; CHECK-OPAQUE-NEXT: [[ARRAYDECAY:%.*]] = bitcast ptr [[C]] to ptr +; CHECK-OPAQUE-NEXT: [[C_REMAT:%.*]] = alloca i64, align 2 +; CHECK-OPAQUE-NEXT: [[SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[C_REMAT]], i64 4 ; CHECK-OPAQUE-NEXT: br label [[IF_END:%.*]] ; CHECK-OPAQUE: if.end: -; CHECK-OPAQUE-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i32, ptr [[ARRAYDECAY]], i64 1 -; CHECK-OPAQUE-NEXT: [[TMP1:%.*]] = call i32 @user_of_alloca(ptr [[ADD_PTR]]) -; CHECK-OPAQUE-NEXT: [[LD:%.*]] = load i64, ptr [[C]], align 4 -; CHECK-OPAQUE-NEXT: ret i64 [[LD]] +; CHECK-OPAQUE-NEXT: store i64 0, ptr [[C_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: [[TMP0:%.*]] = call i32 @user_of_alloca(ptr [[SROA_IDX]]) +; CHECK-OPAQUE-NEXT: [[C_RELOAD:%.*]] = load i64, ptr [[C_REMAT]], align 4 +; CHECK-OPAQUE-NEXT: ret i64 [[C_RELOAD]] ; entry: %c = alloca i64, align 2 @@ -885,19 +863,19 @@ define i8 @transform_load_and_store() { ; CHECK-LABEL: @transform_load_and_store( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[A:%.*]] = alloca i8, align 1 -; CHECK-NEXT: store i8 0, i8* [[A]], align 1 -; CHECK-NEXT: call void @byte_user_of_alloca(i8* [[A]]) -; CHECK-NEXT: [[R:%.*]] = load i8, i8* [[A]], align 1 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: [[A_REMAT:%.*]] = alloca i8, align 1 +; CHECK-NEXT: store i8 0, i8* [[A_REMAT]], align 1 +; CHECK-NEXT: call void @byte_user_of_alloca(i8* [[A_REMAT]]) +; CHECK-NEXT: [[A_RELOAD:%.*]] = load i8, i8* [[A_REMAT]], align 1 +; CHECK-NEXT: ret i8 [[A_RELOAD]] ; ; CHECK-OPAQUE-LABEL: @transform_load_and_store( ; CHECK-OPAQUE-NEXT: entry: -; CHECK-OPAQUE-NEXT: [[A:%.*]] = alloca i8, align 1 -; CHECK-OPAQUE-NEXT: store i8 0, ptr [[A]], align 1 -; CHECK-OPAQUE-NEXT: call void @byte_user_of_alloca(ptr [[A]]) -; CHECK-OPAQUE-NEXT: [[R:%.*]] = load i8, ptr [[A]], align 1 -; CHECK-OPAQUE-NEXT: ret i8 [[R]] +; CHECK-OPAQUE-NEXT: [[A_REMAT:%.*]] = alloca i8, align 1 +; CHECK-OPAQUE-NEXT: store i8 0, ptr [[A_REMAT]], align 1 +; CHECK-OPAQUE-NEXT: call void @byte_user_of_alloca(ptr [[A_REMAT]]) +; CHECK-OPAQUE-NEXT: [[A_RELOAD:%.*]] = load i8, ptr [[A_REMAT]], align 1 +; CHECK-OPAQUE-NEXT: ret i8 [[A_RELOAD]] ; entry: %a = alloca i8