Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -3368,10 +3368,18 @@ // maybe it would make it more principled? SmallPtrSet UnsplittableLoads; + // Track alloca slices used by store instructions. + SmallDenseMap Store2SliceMap; + DEBUG(dbgs() << " Searching for candidate loads and stores\n"); for (auto &P : AS.partitions()) { for (Slice &S : P) { Instruction *I = cast(S.getUse()->getUser()); + + // Populate the Store2Slice map. + if (auto *SI = dyn_cast(I)) + Store2SliceMap.insert(std::make_pair(SI, &S)); + if (!S.isSplittable() ||S.endOffset() <= P.endOffset()) { // If this was a load we have to track that it can't participate in any // pre-splitting! @@ -3608,6 +3616,15 @@ Value *StoreBasePtr = SI->getPointerOperand(); IRB.SetInsertPoint(SI); + // We need to check if this store uses a slice of the alloca. If so, then + // we have to remember to build a new slice for each split store, and + // kill the original slice. Otherwise we risk to leave the partition + // structure in an inconsistent state. + auto Store2SliceMapI = Store2SliceMap.find(SI); + Slice *StoreSlice = nullptr; + if (Store2SliceMapI != Store2SliceMap.end()) + StoreSlice = Store2SliceMapI->second; + DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n"); for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) { @@ -3623,6 +3640,29 @@ getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false); (void)PStore; DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n"); + + // Check if we have to build alloca slices for this split store. + if (StoreSlice) { + uint64_t BaseOffset = StoreSlice->beginOffset(); + NewSlices.push_back( + Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize, + &PStore->getOperandUse(PStore->getPointerOperandIndex()), + /*IsSplittable*/ false)); + DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset() + << ", " << NewSlices.back().endOffset() + << "): " << *PStore << "\n"); + } + } + + // Mark the original store as dead. + DeadInsts.insert(SI); + + // Check if we have to kill the slice associated to the original store. + // For consistency, we need to kill the original slice only if we have + // built new slices for the split stores. + if (StoreSlice) { + StoreSlice->kill(); + continue; } // We want to immediately iterate on any allocas impacted by splitting @@ -3636,9 +3676,6 @@ StoreBasePtr->stripInBoundsOffsets())) { Worklist.insert(OtherAI); } - - // Mark the original store as dead. - DeadInsts.insert(SI); } // Save the split loads if there are deferred stores among the users. Index: test/Transforms/SROA/basictest.ll =================================================================== --- test/Transforms/SROA/basictest.ll +++ test/Transforms/SROA/basictest.ll @@ -1633,3 +1633,33 @@ %load = load i16, i16* %bc2 ret i16 %load } + +%struct.STest = type { %struct.SPos, %struct.SPos } +%struct.SPos = type { float, float } + +define void @PR25873(%struct.STest* %outData) { +; CHECK-LABEL: @PR25873( +; CHECK: store i32 1123418112 +; CHECK: store i32 1139015680 +; CHECK: store i32 1123418112 +; CHECK: store i32 1139015680 +entry: + %tmpData = alloca %struct.STest, align 8 + %0 = bitcast %struct.STest* %tmpData to i8* + call void @llvm.lifetime.start(i64 16, i8* %0) + %x = getelementptr inbounds %struct.STest, %struct.STest* %tmpData, i64 0, i32 0, i32 0 + store float 1.230000e+02, float* %x, align 8 + %y = getelementptr inbounds %struct.STest, %struct.STest* %tmpData, i64 0, i32 0, i32 1 + store float 4.560000e+02, float* %y, align 4 + %m_posB = getelementptr inbounds %struct.STest, %struct.STest* %tmpData, i64 0, i32 1 + %1 = bitcast %struct.STest* %tmpData to i64* + %2 = bitcast %struct.SPos* %m_posB to i64* + %3 = load i64, i64* %1, align 8 + store i64 %3, i64* %2, align 8 + %4 = bitcast %struct.STest* %outData to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* %0, i64 16, i32 4, i1 false) + call void @llvm.lifetime.end(i64 16, i8* %0) + ret void +} + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind