Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -169,6 +169,18 @@ return nullptr; } +/// Returns true if V is dereferenceable for size of alloca. +static bool isDereferenceableForAllocaSize(const Value *V, const AllocaInst *AI, + const DataLayout &DL) { + if (AI->isArrayAllocation()) + return false; + uint64_t AllocaSize = DL.getTypeStoreSize(AI->getAllocatedType()); + if (!AllocaSize) + return false; + return isDereferenceableAndAlignedPointer(V, AI->getAlignment(), + APInt(64, AllocaSize), DL); +} + static Instruction *simplifyAllocaArraySize(InstCombiner &IC, AllocaInst &AI) { // Check for array size of 1 (scalar allocation). if (!AI.isArrayAllocation()) { @@ -390,7 +402,8 @@ if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { unsigned SourceAlign = getOrEnforceKnownAlignment( Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT); - if (AI.getAlignment() <= SourceAlign) { + if (AI.getAlignment() <= SourceAlign && + isDereferenceableForAllocaSize(Copy->getSource(), &AI, DL)) { DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) Index: test/Transforms/InstCombine/memcpy-from-global.ll =================================================================== --- test/Transforms/InstCombine/memcpy-from-global.ll +++ test/Transforms/InstCombine/memcpy-from-global.ll @@ -204,3 +204,34 @@ ; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U], [2 x %U]* @H, i64 0, i64 1) to i8*)) ret void } + +@bbb = local_unnamed_addr global [1000000 x i8] zeroinitializer, align 16 +@_ZL3KKK = internal unnamed_addr constant [3 x i8] c"\01\01\02", align 1 + +; Should not replace alloca with global because of size mismatch. +define void @test9_small_global() { +; CHECK-LABEL: @test9_small_global( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}@bbb,{{.*}}@_ZL3KKK, +; CHECK: alloca [1000000 x i8] +entry: + %cc = alloca [1000000 x i8], align 16 + %cc.0..sroa_idx = getelementptr inbounds [1000000 x i8], [1000000 x i8]* %cc, i64 0, i64 0 + %arraydecay = getelementptr inbounds [1000000 x i8], [1000000 x i8]* %cc, i32 0, i32 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZL3KKK, i32 0, i32 0), i64 3, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([1000000 x i8], [1000000 x i8]* @bbb, i32 0, i32 0), i8* %arraydecay, i64 1000000, i32 16, i1 false) + ret void +} + +; Should replace alloca with global as they have exactly the same size. +define void @test10_same_global() { +; CHECK-LABEL: @test10_same_global( +; CHECK-NOT: alloca +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}@bbb,{{.*}}@_ZL3KKK,{{.*}}, i64 3, +entry: + %cc = alloca [3 x i8], align 1 + %cc.0..sroa_idx = getelementptr inbounds [3 x i8], [3 x i8]* %cc, i64 0, i64 0 + %arraydecay = getelementptr inbounds [3 x i8], [3 x i8]* %cc, i32 0, i32 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZL3KKK, i32 0, i32 0), i64 3, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([1000000 x i8], [1000000 x i8]* @bbb, i32 0, i32 0), i8* %arraydecay, i64 3, i32 1, i1 false) + ret void +}