Index: lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp =================================================================== --- lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -157,6 +157,35 @@ return true; } +/// Returns true if MemTransferInst overwrites entire alloca. +static bool isCompletelyOverwritten(const AllocaInst &AI, + const MemTransferInst &TheCopy) { + const ConstantInt *ArraySize = dyn_cast(AI.getArraySize()); + if (!ArraySize) + return false; + + uint64_t ConstArraySize = ArraySize->getZExtValue(); + if (!ConstArraySize) + return false; + + const DataLayout &DL = AI.getModule()->getDataLayout(); + uint64_t TySize = DL.getTypeStoreSize(AI.getAllocatedType()); + if (!TySize) + return false; + + ConstantInt *CopyLength = dyn_cast(TheCopy.getLength()); + if (!CopyLength) + return false; + + uint64_t CopySize = CopyLength->getZExtValue(); + uint64_t AllocaSize = TySize * ConstArraySize; + assert(CopySize <= AllocaSize); + if (CopySize < AllocaSize) + return false; + + return true; +} + /// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only /// modified by a copy from a constant global. If we can prove this, we can /// replace any uses of the alloca with uses of the global directly. @@ -164,7 +193,8 @@ isOnlyCopiedFromConstantGlobal(AllocaInst *AI, SmallVectorImpl &ToDelete) { MemTransferInst *TheCopy = nullptr; - if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) + if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete) && TheCopy && + isCompletelyOverwritten(*AI, *TheCopy)) return TheCopy; return nullptr; } Index: test/Transforms/InstCombine/memcpy-from-global.ll =================================================================== --- test/Transforms/InstCombine/memcpy-from-global.ll +++ test/Transforms/InstCombine/memcpy-from-global.ll @@ -204,3 +204,32 @@ ; CHECK-NEXT: call void @bar(i8* bitcast (%U* getelementptr inbounds ([2 x %U], [2 x %U]* @H, i64 0, i64 1) to i8*)) ret void } + +@bbb = local_unnamed_addr global [1000000 x i8] zeroinitializer, align 16 +@_ZL3KKK = internal unnamed_addr constant [3 x i8] c"\01\01\02", align 1 + +define void @test9_small_global() { +; CHECK-LABEL: @test9_small_global( +; CHECK-NOT: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}@bbb,{{.*}}@_ZL3KKK, +; CHECK: alloca [1000000 x i8] +entry: + %cc = alloca [1000000 x i8], align 16 + %cc.0..sroa_idx = getelementptr inbounds [1000000 x i8], [1000000 x i8]* %cc, i64 0, i64 0 + %arraydecay = getelementptr inbounds [1000000 x i8], [1000000 x i8]* %cc, i32 0, i32 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZL3KKK, i32 0, i32 0), i64 3, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([1000000 x i8], [1000000 x i8]* @bbb, i32 0, i32 0), i8* %arraydecay, i64 1000000, i32 16, i1 false) + ret void +} + +define void @test10_same_global() { +; CHECK-LABEL: @test10_same_global( +; CHECK-NOT: alloca +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64({{.*}}@bbb,{{.*}}@_ZL3KKK,{{.*}}, i64 3, i32 1, i1 false) +entry: + %cc = alloca [3 x i8], align 1 + %cc.0..sroa_idx = getelementptr inbounds [3 x i8], [3 x i8]* %cc, i64 0, i64 0 + %arraydecay = getelementptr inbounds [3 x i8], [3 x i8]* %cc, i32 0, i32 0 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arraydecay, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZL3KKK, i32 0, i32 0), i64 3, i32 1, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* getelementptr inbounds ([1000000 x i8], [1000000 x i8]* @bbb, i32 0, i32 0), i8* %arraydecay, i64 3, i32 1, i1 false) + ret void +}