Index: lib/Transforms/Scalar/SROA.cpp =================================================================== --- lib/Transforms/Scalar/SROA.cpp +++ lib/Transforms/Scalar/SROA.cpp @@ -2241,19 +2241,48 @@ assert(LI.getType()->getIntegerBitWidth() == DL.getTypeStoreSizeInBits(LI.getType()) && "Non-byte-multiple bit width"); - // Move the insertion point just past the load so that we can refer to it. - IRB.SetInsertPoint(std::next(BasicBlock::iterator(&LI))); - // Create a placeholder value with the same type as LI to use as the - // basis for the new value. This allows us to replace the uses of LI with - // the computed value, and then replace the placeholder with LI, leaving - // LI only used for this computation. - Value *Placeholder - = new LoadInst(UndefValue::get(LI.getType()->getPointerTo())); - V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset, - "insert"); - LI.replaceAllUsesWith(V); - Placeholder->replaceAllUsesWith(&LI); - delete Placeholder; + + StoreInst *SI; + // If this load is stored directly, slice the store. + if (LI.hasOneUse() && (SI = dyn_cast(LI.user_back())) && + !SI->isVolatile()) { + // Record this instruction for deletion. + Pass.DeadInsts.insert(SI); + Value *StorePtr = SI->getPointerOperand(); + // Strip all inbounds GEPs and pointer casts to try to dig out any root + // alloca that should be re-examined after rewriting this instruction. + if (AllocaInst *AI = + dyn_cast(StorePtr->stripInBoundsOffsets())) { + assert(AI != &OldAI && AI != &NewAI && "Splittable transfers cannot " + "reach the same alloca on " + "both ends."); + Pass.Worklist.insert(AI); + } + unsigned DestAS = StorePtr->getType()->getPointerAddressSpace(); + Type *DestPtrTy = V->getType()->getPointerTo(DestAS); + + // Compute the relative offset for the dest pointer. + unsigned IntPtrWidth = DL.getPointerSizeInBits(DestAS); + APInt OtherOffset(IntPtrWidth, NewBeginOffset - BeginOffset); + Value *DestPtr = getAdjustedPtr(IRB, DL, StorePtr, OtherOffset, + DestPtrTy, StorePtr->getName() + "."); + IRB.SetInsertPoint(SI); + IRB.CreateAlignedStore(V, DestPtr, SI->getAlignment(), false); + } else { + // Move the insertion point just past the load so that we can refer to + // it. + IRB.SetInsertPoint(std::next(BasicBlock::iterator(&LI))); + // Create a placeholder value with the same type as LI to use as the + // basis for the new value. This allows us to replace the uses of LI + // with the computed value, and then replace the placeholder with LI, + // leaving LI only used for this computation. + Value *Placeholder = + new LoadInst(UndefValue::get(LI.getType()->getPointerTo())); + V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset, "insert"); + LI.replaceAllUsesWith(V); + Placeholder->replaceAllUsesWith(&LI); + delete Placeholder; + } } else { LI.replaceAllUsesWith(V); } Index: test/Transforms/SROA/slice-load-store.ll =================================================================== --- /dev/null +++ test/Transforms/SROA/slice-load-store.ll @@ -0,0 +1,94 @@ +; RUN: opt < %s -sroa -S | FileCheck %s +target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" + +; Test when splitting a load which is stored to another pointer, SROA should not +; generate EXTEND, SHIFT, AND, OR. +; To generate simpler code, SROA can split the store. + +%struct.point.two.i32 = type { i32, i32 } + +define void @load_store_i64(i32 %a, %struct.point.two.i32* %ptr) { +; CHECK-LABEL: @load_store_i64( +; CHECK-NOT: zext +; CHECK-NOT: shl +; CHECK-NOT: and +; CHECK-NOT: or +; CHECK: store i32 %a +; CHECK: store i32 %a + %ref.tmp = alloca i64, align 8 + %tmpcast = bitcast i64* %ref.tmp to %struct.point.two.i32* + %x.i = getelementptr inbounds %struct.point.two.i32* %tmpcast, i64 0, i32 0 + store i32 %a, i32* %x.i, align 4 + %y.i = getelementptr inbounds %struct.point.two.i32* %tmpcast, i64 0, i32 1 + store i32 %a, i32* %y.i, align 4 + %1 = bitcast %struct.point.two.i32* %ptr to i64* + %2 = load i64* %ref.tmp, align 8 + store i64 %2, i64* %1, align 4 + ret void +} + +define void @load_store_i64_ab(i32 %a, i32 %b, %struct.point.two.i32* %ptr) { +; CHECK-LABEL: @load_store_i64_ab( +; CHECK-NOT: zext +; CHECK-NOT: shl +; CHECK-NOT: and +; CHECK-NOT: or +; CHECK: store i32 %{{a|b}} +; CHECK: store i32 %{{a|b}} + %ref.tmp = alloca i64, align 8 + %tmpcast = bitcast i64* %ref.tmp to %struct.point.two.i32* + %x.i = getelementptr inbounds %struct.point.two.i32* %tmpcast, i64 0, i32 0 + store i32 %a, i32* %x.i, align 4 + %y.i = getelementptr inbounds %struct.point.two.i32* %tmpcast, i64 0, i32 1 + store i32 %b, i32* %y.i, align 4 + %1 = bitcast %struct.point.two.i32* %ptr to i64* + %2 = load i64* %ref.tmp, align 8 + store i64 %2, i64* %1, align 4 + ret void +} + +define void @load_store_2xAlloca_i64(i32 %a, %struct.point.two.i32* %ptr) { +; CHECK-LABEL: @load_store_2xAlloca_i64( +; CHECK-NOT: zext +; CHECK-NOT: shl +; CHECK-NOT: and +; CHECK-NOT: or +; CHECK: store i32 %a +; CHECK: store i32 %a + %ref.tmp = alloca i64, align 8 + %tmpcast = bitcast i64* %ref.tmp to %struct.point.two.i32* + %x.i = getelementptr inbounds %struct.point.two.i32* %tmpcast, i64 0, i32 0 + store i32 %a, i32* %x.i, align 4 + %y.i = getelementptr inbounds %struct.point.two.i32* %tmpcast, i64 0, i32 1 + store i32 %a, i32* %y.i, align 4 + %1 = alloca i64, align 8 + %2 = load i64* %ref.tmp, align 8 + store i64 %2, i64* %1, align 4 + %3 = load i64* %1, align 8 + %4 = bitcast %struct.point.two.i32* %ptr to i64* + store i64 %3, i64* %4, align 4 + ret void +} + +define void @load_store_2xAlloca_i64_ab(i32 %a, i32 %b, %struct.point.two.i32* %ptr) { +; CHECK-LABEL: @load_store_2xAlloca_i64_ab( +; CHECK-NOT: zext +; CHECK-NOT: shl +; CHECK-NOT: and +; CHECK-NOT: or +; CHECK: store i32 %{{a|b}} +; CHECK: store i32 %{{a|b}} + %ref.tmp = alloca i64, align 8 + %tmpcast = bitcast i64* %ref.tmp to %struct.point.two.i32* + %x.i = getelementptr inbounds %struct.point.two.i32* %tmpcast, i64 0, i32 0 + store i32 %a, i32* %x.i, align 4 + %y.i = getelementptr inbounds %struct.point.two.i32* %tmpcast, i64 0, i32 1 + store i32 %b, i32* %y.i, align 4 + %1 = alloca i64, align 8 + %2 = load i64* %ref.tmp, align 8 + store i64 %2, i64* %1, align 4 + %3 = load i64* %1, align 8 + %4 = bitcast %struct.point.two.i32* %ptr to i64* + store i64 %3, i64* %4, align 4 + ret void +}