diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt --- a/llvm/lib/Transforms/Scalar/CMakeLists.txt +++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt @@ -97,3 +97,5 @@ Support TransformUtils ) + +set_source_files_properties(SROA.cpp PROPERTIES COMPILE_FLAGS -O0 -g3 -ggdb3) diff --git a/llvm/lib/Transforms/Scalar/SROA.cpp b/llvm/lib/Transforms/Scalar/SROA.cpp --- a/llvm/lib/Transforms/Scalar/SROA.cpp +++ b/llvm/lib/Transforms/Scalar/SROA.cpp @@ -298,10 +298,15 @@ friend class AllocaSlices::SliceBuilder; -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + Instruction *fixupRewritableEscapes(); + /// Handle to alloca instruction to simplify method interfaces. AllocaInst &AI; -#endif + + /// Certain escaping uses of an alloca (non-capturing-ones) + /// do not prevent promotion, but we have to rewrite them + /// to make promotion possible. This records all such uses. + SmallVector> RewritableEscapes; /// The instruction responsible for this alloca not having a known set /// of slices. @@ -1058,16 +1063,20 @@ void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); } + void visitCallBase(CallBase &CB) { + if (!IsOffsetKnown || !CB.doesNotCapture(U->getOperandNo())) + return PI.setAborted(&CB); + // If we know that the callee does not retain the pointer, + // then it does not prevent SROA, although we have to workaround this. + AS.RewritableEscapes.emplace_back(U, Offset); + } + /// Disable SROA entirely if there are unhandled users of the alloca. void visitInstruction(Instruction &I) { PI.setAborted(&I); } }; AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI) - : -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) - AI(AI), -#endif - PointerEscapingInstr(nullptr) { + : AI(AI), PointerEscapingInstr(nullptr) { SliceBuilder PB(DL, AI, *this); SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI); if (PtrI.isEscaped() || PtrI.isAborted()) { @@ -1079,6 +1088,16 @@ return; } + // We may have found that the pointer to the AI escapes, but isn't captured + // in process. Rewrite these uses to not affect the promotion of the alloca. + if (Instruction *NewUsesOfAI = fixupRewritableEscapes()) { + // Reanalyze new uses of an alloca. + SliceBuilder::PtrInfo PtrI = PB.visitPtr(*NewUsesOfAI); + (void)PtrI; + assert(!PtrI.isEscaped() && !PtrI.isAborted() && + "Failed to analyze new memory operations?"); + } + llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); }); // Sort the uses. This arranges for the offsets to be in ascending order, @@ -3578,6 +3597,98 @@ } // end anonymous namespace +Instruction *AllocaSlices::fixupRewritableEscapes() { + if (RewritableEscapes.empty()) + return nullptr; + + Instruction *OrigAlloca = &AI; + + // Let's regroup the escapes by the Instruction, + // in case a single instruction escapes the pointer more than once. + SmallMapVector *, 1>, + 8> + RewritableEscapesMap; + for (const std::pair &RewritableEscape : RewritableEscapes) + RewritableEscapesMap[cast(RewritableEscape.first->getUser())] + .emplace_back(&RewritableEscape); + + // First, duplicate the alloca. This is fine to do, + // since we know that the old alloca should go away. + auto *CloneAlloca = cast(OrigAlloca->clone()); + CloneAlloca->setName(OrigAlloca->getName() + ".remat"); + CloneAlloca->insertAfter(OrigAlloca); + + IRBuilderTy Builder(OrigAlloca->getContext()); + const DataLayout &DL = OrigAlloca->getModule()->getDataLayout(); + + // In order to simplify our life, let's base all the new uses of an AI, + // off of a single new def-use. This will simplify it's reanalysis. + OrigAlloca = GetElementPtrInst::CreateInBounds( + AI.getAllocatedType(), OrigAlloca, None, AI.getName() + ".new.uses", + CloneAlloca); + + // Spill the entire original alloca into our new clone alloca. + auto SpillOrigAllocaBefore = [&](Instruction *InsertBefore) { + Builder.SetInsertPoint(InsertBefore); + Value *SpilledValue = Builder.CreateLoad(AI.getAllocatedType(), OrigAlloca, + AI.getName() + ".spill"); + Builder.CreateStore(SpilledValue, CloneAlloca); + }; + + // Reload the entire original alloca from our new clone alloca. + auto ReloadOrigAllocaBefore = [&](Instruction *InsertBefore) { + Builder.SetInsertPoint(InsertBefore); + Value *ReloadedValue = Builder.CreateLoad( + AI.getAllocatedType(), CloneAlloca, AI.getName() + ".reload"); + Builder.CreateStore(ReloadedValue, OrigAlloca); + }; + + // Rebase this pointer into orig alloca to be based on clone alloca. + auto RebaseOrigAllocaPtr = + [CloneAlloca, &Builder, + DL](const std::pair *EscapingPtrUse) { + Use *U = EscapingPtrUse->first; + Builder.SetInsertPoint(cast(U->getUser())); + const APInt &Offset = EscapingPtrUse->second; + Type *PtrTy = U->get()->getType(); // FIXME: this isn't right. + Value *NewPtr = + getAdjustedPtr(Builder, DL, CloneAlloca, Offset, PtrTy, ""); + U->set(NewPtr); + }; + + // For each rewritable instruction that escapes a pointer to the orig alloca. + for (const auto &RewritableEscape : RewritableEscapesMap) { + Instruction *EscapingUserInst = RewritableEscape.first; + + // First, before said instruction, spill the current state + // of the orig alloca into the clone alloca. + SpillOrigAllocaBefore(EscapingUserInst); + + // Then, for all the escaping ptrs, rewrite them to be clone alloca-based. + for (const std::pair *EscapingPtrUse : + RewritableEscape.second) + RebaseOrigAllocaPtr(EscapingPtrUse); + + // And after the instruction, restore the state of orig alloca. + // Note that if the instruction is a terminator, + // we have to do that on *each* path. + if (!EscapingUserInst->isTerminator()) + ReloadOrigAllocaBefore(EscapingUserInst->getNextNode()); + else { + for (BasicBlock *SuccBB : successors(EscapingUserInst->getParent())) { + BasicBlock::iterator I = SuccBB->getFirstInsertionPt(); + assert(I != SuccBB->end() && "Successor block has no insertion point?"); + ReloadOrigAllocaBefore(&*I); + } + } + } + + // Resplit any FCA load/stores we may have introduced. + AggLoadStoreRewriter(DL).rewrite(*OrigAlloca); + + return OrigAlloca; +} + /// Strip aggregate type wrapping. /// /// This removes no-op aggregate types wrapping an underlying type. It will diff --git a/llvm/test/Transforms/PhaseOrdering/lifetime-sanitizer.ll b/llvm/test/Transforms/PhaseOrdering/lifetime-sanitizer.ll --- a/llvm/test/Transforms/PhaseOrdering/lifetime-sanitizer.ll +++ b/llvm/test/Transforms/PhaseOrdering/lifetime-sanitizer.ll @@ -9,7 +9,7 @@ declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) -declare void @foo(i8* nocapture) +declare void @foo(i8*) define void @asan() sanitize_address { entry: diff --git a/llvm/test/Transforms/SROA/non-capturing-call.ll b/llvm/test/Transforms/SROA/non-capturing-call.ll --- a/llvm/test/Transforms/SROA/non-capturing-call.ll +++ b/llvm/test/Transforms/SROA/non-capturing-call.ll @@ -5,23 +5,22 @@ define i32 @alloca_used_in_call(i32* nocapture nonnull readonly %data, i64 %n) { ; CHECK-LABEL: @alloca_used_in_call( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RETVAL_REMAT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_0]], [[LD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL]]) -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL_REMAT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL_REMAT]]) +; CHECK-NEXT: [[RETVAL_RELOAD:%.*]] = load i32, i32* [[RETVAL_REMAT]], align 4 +; CHECK-NEXT: ret i32 [[RETVAL_RELOAD]] ; entry: %retval = alloca i32, align 4 @@ -91,24 +90,22 @@ define i32 @alloca_with_gep_used_in_call(i32* nocapture nonnull readonly %data, i64 %n) { ; CHECK-LABEL: @alloca_with_gep_used_in_call( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RETVAL_REMAT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_0]], [[LD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[GEP:%.*]] = getelementptr i32, i32* [[RETVAL]], i32 0 -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @user_of_alloca(i32* nocapture nonnull [[GEP]]) -; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: ret i32 [[TMP1]] +; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL_REMAT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL_REMAT]]) +; CHECK-NEXT: [[RETVAL_RELOAD:%.*]] = load i32, i32* [[RETVAL_REMAT]], align 4 +; CHECK-NEXT: ret i32 [[RETVAL_RELOAD]] ; entry: %retval = alloca i32, align 4 @@ -179,31 +176,32 @@ define i32 @alloca_used_in_maybe_throwing_call(i32* nocapture nonnull readonly %data, i64 %n) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @alloca_used_in_maybe_throwing_call( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RETVAL_REMAT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_0]], [[LD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[TMP0:%.*]] = invoke i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL]]) +; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL_REMAT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = invoke i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL_REMAT]]) ; CHECK-NEXT: to label [[CONT:%.*]] unwind label [[UW:%.*]] ; CHECK: cont: +; CHECK-NEXT: [[RETVAL_RELOAD:%.*]] = load i32, i32* [[RETVAL_REMAT]], align 4 ; CHECK-NEXT: br label [[END:%.*]] ; CHECK: uw: ; CHECK-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* null +; CHECK-NEXT: [[RETVAL_RELOAD1:%.*]] = load i32, i32* [[RETVAL_REMAT]], align 4 ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[RETVAL_1:%.*]] = phi i32 [ [[RETVAL_RELOAD]], [[CONT]] ], [ [[RETVAL_RELOAD1]], [[UW]] ] +; CHECK-NEXT: ret i32 [[RETVAL_1]] ; entry: %retval = alloca i32, align 4 @@ -239,29 +237,29 @@ define i32 @alloca_used_in_maybe_throwing_call_with_same_dests(i32* nocapture nonnull readonly %data, i64 %n) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { ; CHECK-LABEL: @alloca_used_in_maybe_throwing_call_with_same_dests( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -; CHECK-NEXT: store i32 0, i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RETVAL_REMAT:%.*]] = alloca i32, align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RETVAL_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_0]], [[LD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[TMP0:%.*]] = invoke i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL]]) +; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL_REMAT]], align 4 +; CHECK-NEXT: [[TMP0:%.*]] = invoke i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL_REMAT]]) ; CHECK-NEXT: to label [[END:%.*]] unwind label [[UW:%.*]] ; CHECK: uw: ; CHECK-NEXT: [[TMP1:%.*]] = landingpad { i8*, i32 } ; CHECK-NEXT: catch i8* null +; CHECK-NEXT: [[RETVAL_RELOAD1:%.*]] = load i32, i32* [[RETVAL_REMAT]], align 4 ; CHECK-NEXT: br label [[END]] ; CHECK: end: -; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: ret i32 [[TMP2]] +; CHECK-NEXT: [[RETVAL_RELOAD:%.*]] = load i32, i32* [[RETVAL_REMAT]], align 4 +; CHECK-NEXT: ret i32 [[RETVAL_RELOAD]] ; entry: %retval = alloca i32, align 4 @@ -294,31 +292,28 @@ define [2 x i32] @part_of_alloca_used_in_call(i32* nocapture nonnull readonly %data, i64 %n) { ; CHECK-LABEL: @part_of_alloca_used_in_call( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[RETVAL_FULL:%.*]] = alloca [2 x i32], align 4 -; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-NEXT: store i32 0, i32* [[DOTFCA_0_GEP]], align 4 -; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-NEXT: store i32 0, i32* [[DOTFCA_1_GEP]], align 4 -; CHECK-NEXT: [[RETVAL:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i64 0, i64 1 +; CHECK-NEXT: [[RETVAL_FULL_REMAT:%.*]] = alloca [2 x i32], align 4 ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RETVAL_FULL_SROA_4_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[RDX_INC:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ] ; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[DATA:%.*]], i64 [[INDVARS_IV]] ; CHECK-NEXT: [[LD:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -; CHECK-NEXT: [[RDX:%.*]] = load i32, i32* [[RETVAL]], align 4 -; CHECK-NEXT: [[RDX_INC:%.*]] = add nsw i32 [[RDX]], [[LD]] -; CHECK-NEXT: store i32 [[RDX_INC]], i32* [[RETVAL]], align 4 +; CHECK-NEXT: [[RDX_INC]] = add nsw i32 [[RETVAL_FULL_SROA_4_0]], [[LD]] ; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[N:%.*]] ; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: [[TMP0:%.*]] = call i32 @user_of_alloca(i32* nocapture nonnull [[RETVAL]]) -; CHECK-NEXT: [[DOTFCA_0_GEP1:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 0 -; CHECK-NEXT: [[DOTFCA_0_LOAD:%.*]] = load i32, i32* [[DOTFCA_0_GEP1]], align 4 -; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[DOTFCA_0_LOAD]], 0 -; CHECK-NEXT: [[DOTFCA_1_GEP2:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL]], i32 0, i32 1 -; CHECK-NEXT: [[DOTFCA_1_LOAD:%.*]] = load i32, i32* [[DOTFCA_1_GEP2]], align 4 -; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[DOTFCA_0_INSERT]], i32 [[DOTFCA_1_LOAD]], 1 +; CHECK-NEXT: [[RETVAL_FULL_SPILL_FCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 0, 0 +; CHECK-NEXT: [[RETVAL_FULL_SPILL_FCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[RETVAL_FULL_SPILL_FCA_0_INSERT]], i32 [[RDX_INC]], 1 +; CHECK-NEXT: store [2 x i32] [[RETVAL_FULL_SPILL_FCA_1_INSERT]], [2 x i32]* [[RETVAL_FULL_REMAT]], align 4 +; CHECK-NEXT: [[SROA_IDX:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[RETVAL_FULL_REMAT]], i64 0, i64 1 +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @user_of_alloca(i32* nocapture nonnull [[SROA_IDX]]) +; CHECK-NEXT: [[RETVAL_FULL_RELOAD:%.*]] = load [2 x i32], [2 x i32]* [[RETVAL_FULL_REMAT]], align 4 +; CHECK-NEXT: [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 0 +; CHECK-NEXT: [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT:%.*]] = extractvalue [2 x i32] [[RETVAL_FULL_RELOAD]], 1 +; CHECK-NEXT: [[DOTFCA_0_INSERT:%.*]] = insertvalue [2 x i32] undef, i32 [[RETVAL_FULL_RELOAD_FCA_0_EXTRACT]], 0 +; CHECK-NEXT: [[DOTFCA_1_INSERT:%.*]] = insertvalue [2 x i32] [[DOTFCA_0_INSERT]], i32 [[RETVAL_FULL_RELOAD_FCA_1_EXTRACT]], 1 ; CHECK-NEXT: ret [2 x i32] [[DOTFCA_1_INSERT]] ; entry: