Index: lib/CodeGen/CGObjC.cpp =================================================================== --- lib/CodeGen/CGObjC.cpp +++ lib/CodeGen/CGObjC.cpp @@ -1495,6 +1495,13 @@ // Fast enumeration state. QualType StateTy = CGM.getObjCFastEnumerationStateType(); Address StatePtr = CreateMemTemp(StateTy, "state.ptr"); + Address ForAllIdxAddr = + CreateMemTemp(getContext().UnsignedLongTy, "for.all.index.addr"); + Address ForAllCntAddr = + CreateMemTemp(getContext().UnsignedLongTy, "for.all.count.addr"); + Address InititialMutationsAddr = CreateMemTemp( + getContext().UnsignedLongTy, "forcooll.initial-mutations.addr"); + EmitNullInitialization(StatePtr, StateTy); // Number of elements in the items array. @@ -1565,6 +1572,9 @@ llvm::Value *zero = llvm::Constant::getNullValue(UnsignedLongLTy); + Address StateMutationsPtrPtr = Builder.CreateStructGEP( + StatePtr, 2, 2 * getPointerSize(), "mutationsptr.ptr"); + // If the limit pointer was zero to begin with, the collection is // empty; skip all this. Set the branch weight assuming this has the same // probability of exiting the loop as any other loop exit. @@ -1580,28 +1590,23 @@ // Save the initial mutations value. This is the value at an // address that was written into the state object by // countByEnumeratingWithState:objects:count:. - Address StateMutationsPtrPtr = Builder.CreateStructGEP( - StatePtr, 2, 2 * getPointerSize(), "mutationsptr.ptr"); llvm::Value *StateMutationsPtr = Builder.CreateLoad(StateMutationsPtrPtr, "mutationsptr"); llvm::Value *initialMutations = Builder.CreateAlignedLoad(StateMutationsPtr, getPointerAlign(), "forcoll.initial-mutations"); + Builder.CreateStore(initialMutations, InititialMutationsAddr); + + // Initialize index and buffer size. + Builder.CreateStore(zero, ForAllIdxAddr); + Builder.CreateStore(initialBufferLimit, ForAllCntAddr); // Start looping. This is the point we return to whenever we have a // fresh, non-empty batch of objects. llvm::BasicBlock *LoopBodyBB = createBasicBlock("forcoll.loopbody"); EmitBlock(LoopBodyBB); - // The current index into the buffer. - llvm::PHINode *index = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.index"); - index->addIncoming(zero, LoopInitBB); - - // The current buffer size. - llvm::PHINode *count = Builder.CreatePHI(UnsignedLongLTy, 3, "forcoll.count"); - count->addIncoming(initialBufferLimit, LoopInitBB); - incrementProfileCounter(&S); // Check whether the mutations value has changed from where it was @@ -1614,6 +1619,8 @@ llvm::BasicBlock *WasMutatedBB = createBasicBlock("forcoll.mutated"); llvm::BasicBlock *WasNotMutatedBB = createBasicBlock("forcoll.notmutated"); + initialMutations = + Builder.CreateLoad(InititialMutationsAddr, "forcoll.initial-mutations"); Builder.CreateCondBr(Builder.CreateICmpEQ(currentMutations, initialMutations), WasNotMutatedBB, WasMutatedBB); @@ -1668,6 +1675,7 @@ Builder.CreateLoad(StateItemsPtr, "stateitems"); // Fetch the value at the current index from the buffer. + auto *index = Builder.CreateLoad(ForAllIdxAddr, "for.all.index"); llvm::Value *CurrentItemPtr = Builder.CreateGEP(EnumStateItems, index, "currentitem.ptr"); llvm::Value *CurrentItem = @@ -1709,20 +1717,20 @@ llvm::BasicBlock *FetchMoreBB = createBasicBlock("forcoll.refetch"); // First we check in the local buffer. + index = Builder.CreateLoad(ForAllIdxAddr, "for.all.index"); llvm::Value *indexPlusOne = Builder.CreateAdd(index, llvm::ConstantInt::get(UnsignedLongLTy, 1)); + Builder.CreateStore(indexPlusOne, ForAllIdxAddr); // If we haven't overrun the buffer yet, we can continue. // Set the branch weights based on the simplifying assumption that this is // like a while-loop, i.e., ignoring that the false branch fetches more // elements and then returns to the loop. + auto *count = Builder.CreateLoad(ForAllCntAddr, "for.all.count"); Builder.CreateCondBr( Builder.CreateICmpULT(indexPlusOne, count), LoopBodyBB, FetchMoreBB, createProfileWeights(getProfileCount(S.getBody()), EntryCount)); - index->addIncoming(indexPlusOne, AfterBody.getBlock()); - count->addIncoming(count, AfterBody.getBlock()); - // Otherwise, we have to fetch more elements. EmitBlock(FetchMoreBB); @@ -1736,8 +1744,8 @@ llvm::Value *refetchCount = CountRV.getScalarVal(); // (note that the message send might split FetchMoreBB) - index->addIncoming(zero, Builder.GetInsertBlock()); - count->addIncoming(refetchCount, Builder.GetInsertBlock()); + Builder.CreateStore(zero, ForAllIdxAddr); + Builder.CreateStore(refetchCount, ForAllCntAddr); Builder.CreateCondBr(Builder.CreateICmpEQ(refetchCount, zero), EmptyBB, LoopBodyBB); Index: test/CodeGenObjC/arc-foreach.m =================================================================== --- test/CodeGenObjC/arc-foreach.m +++ test/CodeGenObjC/arc-foreach.m @@ -26,6 +26,9 @@ // CHECK-LP64: [[ARRAY:%.*]] = alloca [[ARRAY_T:%.*]]*, // CHECK-LP64-NEXT: [[X:%.*]] = alloca i8*, // CHECK-LP64-NEXT: [[STATE:%.*]] = alloca [[STATE_T:%.*]], +// CHECK-LP64-NEXT: [[INDEX:%.*]] = alloca [[INDEX_T:.*]], +// CHECK-LP64-NEXT: [[COUNT:%.*]] = alloca [[COUNT_T:.*]], +// CHECK-LP64-NEXT: [[MUTATION:%.*]] = alloca [[MUTATION_T:.*]], // CHECK-LP64-NEXT: [[BUFFER:%.*]] = alloca [16 x i8*], align 8 // CHECK-LP64-NEXT: [[BLOCK:%.*]] = alloca [[BLOCK_T:<{.*}>]], @@ -50,13 +53,29 @@ // CHECK-LP64-NEXT: [[T1:%.*]] = bitcast [[ARRAY_T]]* [[SAVED_ARRAY]] to i8* // CHECK-LP64-NEXT: [[SIZE:%.*]] = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, [[STATE_T]]*, [16 x i8*]*, i64)*)(i8* [[T1]], i8* [[T0]], [[STATE_T]]* [[STATE]], [16 x i8*]* [[BUFFER]], i64 16) +// CHECK-LP64-NEXT: [[MUTATIONPTRPTR:%.*]] = getelementptr inbounds [[STATE_T]], [[STATE_T]]* [[STATE]] + // Check for a nonzero result. // CHECK-LP64-NEXT: [[T0:%.*]] = icmp eq i64 [[SIZE]], 0 // CHECK-LP64-NEXT: br i1 [[T0]] +// Initialize count, index, and mutation. +// CHECK-LP64: [[MUTATIONPTR:%.*]] = load [[MUTATION_T]]*, [[MUTATION_T]]** [[MUTATIONPTRPTR]] +// CHECK-LP64: [[INITIALMUTATIONVAL:%.*]] = load [[MUTATION_T]], [[MUTATION_T]]* [[MUTATIONPTR]] +// CHECK-LP64: store [[MUTATION_T]] [[INITIALMUTATIONVAL]], [[MUTATION_T]]* [[MUTATION]] +// CHECK-LP64: store [[INDEX_T]] 0, [[INDEX_T]]* [[INDEX]] +// CHECK-LP64: store [[COUNT_T]] [[SIZE]], [[COUNT_T]]* [[COUNT]] + +// Check for mutation. +// CHECK-LP64: [[MUTATIONPTR:%.*]] = load [[MUTATION_T]]*, [[MUTATION_T]]** [[MUTATIONPTRPTR]] +// CHECK-LP64: [[MUTATIONSTATE:%.*]] = load [[MUTATION_T]], [[MUTATION_T]]* [[MUTATIONPTR]] +// CHECK-LP64: [[INITIALMUTATIONVAL:%.*]] = load [[MUTATION_T]], [[MUTATION_T]]* [[MUTATION]] +// CHECK-LP64: icmp eq [[MUTATION_T]] [[MUTATIONSTATE]], [[INITIALMUTATIONVAL]] + // CHECK-LP64: [[T0:%.*]] = getelementptr inbounds [[STATE_T]], [[STATE_T]]* [[STATE]], i32 0, i32 1 // CHECK-LP64-NEXT: [[T1:%.*]] = load i8**, i8*** [[T0]] -// CHECK-LP64-NEXT: [[T2:%.*]] = getelementptr i8*, i8** [[T1]], i64 +// CHECK-LP64-NEXT: [[INDEXVAL:%.*]] = load [[INDEX_T]], [[INDEX_T]]* [[INDEX]] +// CHECK-LP64-NEXT: [[T2:%.*]] = getelementptr i8*, i8** [[T1]], i64 [[INDEXVAL]] // CHECK-LP64-NEXT: [[T3:%.*]] = load i8*, i8** [[T2]] // CHECK-LP64-NEXT: store i8* [[T3]], i8** [[X]] @@ -69,9 +88,18 @@ // CHECK-LP64: call void @use_block( // CHECK-LP64-NEXT: call void @objc_storeStrong(i8** [[D0]], i8* null) +// Increment index. +// CHECK-LP64: [[INDEXVAL:%.*]] = load [[INDEX_T]], [[INDEX_T]]* [[INDEX]] +// CHECK-LP64: [[T0:%.*]] = add [[INDEX_T]] [[INDEXVAL]], 1 +// CHECK-LP64: store [[INDEX_T]] [[T0]], [[INDEX_T]]* [[INDEX]] +// CHECK-LP64: [[COUNTVAL:%.*]] = load [[COUNT_T]], [[COUNT_T]]* [[COUNT]] +// CHECK-LP64: icmp ult [[COUNT_T]] [[T0]], [[COUNTVAL]] + // CHECK-LP64: [[T0:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES_ // CHECK-LP64-NEXT: [[T1:%.*]] = bitcast [[ARRAY_T]]* [[SAVED_ARRAY]] to i8* // CHECK-LP64-NEXT: [[SIZE:%.*]] = call i64 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i64 (i8*, i8*, [[STATE_T]]*, [16 x i8*]*, i64)*)(i8* [[T1]], i8* [[T0]], [[STATE_T]]* [[STATE]], [16 x i8*]* [[BUFFER]], i64 16) +// CHECK-LP64: store [[INDEX_T]] 0, [[INDEX_T]]* [[INDEX]] +// CHECK-LP64: store [[COUNT_T]] [[SIZE]], [[COUNT_T]]* [[COUNT]] // Release the array. // CHECK-LP64: [[T0:%.*]] = bitcast [[ARRAY_T]]* [[SAVED_ARRAY]] to i8* @@ -99,12 +127,16 @@ // CHECK-LP64: alloca [[ARRAY_T:%.*]]*, // CHECK-LP64-NEXT: [[X:%.*]] = alloca i8*, // CHECK-LP64-NEXT: [[STATE:%.*]] = alloca [[STATE_T:%.*]], +// CHECK-LP64-NEXT: [[INDEX:%.*]] = alloca [[INDEX_T:.*]], +// CHECK-LP64-NEXT: [[COUNT:%.*]] = alloca [[COUNT_T:.*]], +// CHECK-LP64-NEXT: [[MUTATION:%.*]] = alloca [[MUTATION_T:.*]], // CHECK-LP64-NEXT: alloca [16 x i8*], align 8 // CHECK-LP64-NEXT: [[BLOCK:%.*]] = alloca [[BLOCK_T:<{.*}>]], // CHECK-LP64: [[T0:%.*]] = getelementptr inbounds [[STATE_T]], [[STATE_T]]* [[STATE]], i32 0, i32 1 // CHECK-LP64-NEXT: [[T1:%.*]] = load i8**, i8*** [[T0]] -// CHECK-LP64-NEXT: [[T2:%.*]] = getelementptr i8*, i8** [[T1]], i64 +// CHECK-LP64-NEXT: [[INDEXVAL:%.*]] = load [[INDEX_T]], [[INDEX_T]]* [[INDEX]] +// CHECK-LP64-NEXT: [[T2:%.*]] = getelementptr i8*, i8** [[T1]], i64 [[INDEXVAL]] // CHECK-LP64-NEXT: [[T3:%.*]] = load i8*, i8** [[T2]] // CHECK-LP64-NEXT: call i8* @objc_initWeak(i8** [[X]], i8* [[T3]])