Index: llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp =================================================================== --- llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -1770,6 +1770,25 @@ bool storeIsNoop(MemoryDef *Def, const MemoryLocation &DefLoc, const Value *DefUO) { StoreInst *Store = dyn_cast(Def->getMemoryInst()); + MemSetInst *MemSet = dyn_cast(Def->getMemoryInst()); + Constant *StoredConstant = nullptr; + if (Store) + StoredConstant = dyn_cast(Store->getOperand(0)); + if (MemSet) + StoredConstant = dyn_cast(MemSet->getValue()); + + if (StoredConstant && StoredConstant->isNullValue()) { + auto *DefUOInst = dyn_cast(DefUO); + if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) { + auto *UnderlyingDef = cast(MSSA.getMemoryAccess(DefUOInst)); + // If UnderlyingDef is the clobbering access of Def, no instructions + // between them can modify the memory location. + auto *ClobberDef = + MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def); + return UnderlyingDef == ClobberDef; + } + } + if (!Store) return false; @@ -1817,18 +1836,6 @@ } } - Constant *StoredConstant = dyn_cast(Store->getOperand(0)); - if (StoredConstant && StoredConstant->isNullValue()) { - auto *DefUOInst = dyn_cast(DefUO); - if (DefUOInst && isCallocLikeFn(DefUOInst, &TLI)) { - auto *UnderlyingDef = cast(MSSA.getMemoryAccess(DefUOInst)); - // If UnderlyingDef is the clobbering access of Def, no instructions - // between them can modify the memory location. - auto *ClobberDef = - MSSA.getSkipSelfWalker()->getClobberingMemoryAccess(Def); - return UnderlyingDef == ClobberDef; - } - } return false; } }; Index: llvm/test/Transforms/DeadStoreElimination/noop-stores.ll =================================================================== --- llvm/test/Transforms/DeadStoreElimination/noop-stores.ll +++ llvm/test/Transforms/DeadStoreElimination/noop-stores.ll @@ -325,7 +325,6 @@ ; CHECK-LABEL: @zero_memset_after_calloc( ; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) ; CHECK-NEXT: [[L0:%.*]] = bitcast i8* [[CALL]] to i32* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 40000, i1 false) ; CHECK-NEXT: ret i8* [[CALL]] ; %call = tail call i8* @calloc(i64 10000, i64 4) @@ -338,7 +337,6 @@ ; CHECK-LABEL: @zero_memset_and_store_after_calloc( ; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) ; CHECK-NEXT: [[L0:%.*]] = bitcast i8* [[CALL]] to i32* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 40000, i1 false) ; CHECK-NEXT: ret i8* [[CALL]] ; %call = tail call i8* @calloc(i64 10000, i64 4) @@ -348,46 +346,92 @@ ret i8* %call } -define i8* @zero_memset_after_calloc_inaccessiblememonly() { -; CHECK-LABEL: @zero_memset_after_calloc_inaccessiblememonly( -; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) #[[ATTR6]] +define i8* @partial_zero_memset_after_calloc() { +; CHECK-LABEL: @partial_zero_memset_after_calloc( +; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) ; CHECK-NEXT: [[L0:%.*]] = bitcast i8* [[CALL]] to i32* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 40000, i1 false) ; CHECK-NEXT: ret i8* [[CALL]] ; - %call = tail call i8* @calloc(i64 10000, i64 4) inaccessiblememonly + %call = tail call i8* @calloc(i64 10000, i64 4) %l0 = bitcast i8* %call to i32* - call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 40000, i1 false) + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 20, i1 false) ret i8* %call } +define i8* @partial_zero_memset_and_store_after_calloc(i8 %v) { +; CHECK-LABEL: @partial_zero_memset_and_store_after_calloc( +; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, i8* [[CALL]], i64 30 +; CHECK-NEXT: store i8 [[V:%.*]], i8* [[GEP]], align 1 +; CHECK-NEXT: [[L0:%.*]] = bitcast i8* [[CALL]] to i32* +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = tail call i8* @calloc(i64 10000, i64 4) + %gep = getelementptr inbounds i8, i8* %call, i64 30 + store i8 %v, i8* %gep + %l0 = bitcast i8* %call to i32* + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 20, i1 false) + ret i8* %call +} -define i8* @nonzero_memset_after_calloc(i8 %v) { -; CHECK-LABEL: @nonzero_memset_after_calloc( +define i8* @zero_memset_and_store_with_dyn_index_after_calloc(i8 %v, i64 %idx) { +; CHECK-LABEL: @zero_memset_and_store_with_dyn_index_after_calloc( ; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) ; CHECK-NEXT: [[L0:%.*]] = bitcast i8* [[CALL]] to i32* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 [[V:%.*]], i64 40000, i1 false) ; CHECK-NEXT: ret i8* [[CALL]] ; %call = tail call i8* @calloc(i64 10000, i64 4) + %gep = getelementptr i8, i8* %call, i64 %idx + store i8 %v, i8* %gep %l0 = bitcast i8* %call to i32* - call void @llvm.memset.p0i8.i64(i8* %call, i8 %v, i64 40000, i1 false) + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 40000, i1 false) ret i8* %call } -define i8* @zero_memset_after_calloc_smaller_size() { -; CHECK-LABEL: @zero_memset_after_calloc_smaller_size( +define i8* @partial_zero_memset_and_store_with_dyn_index_after_calloc(i8 %v, i64 %idx) { +; CHECK-LABEL: @partial_zero_memset_and_store_with_dyn_index_after_calloc( ; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* [[CALL]], i64 [[IDX:%.*]] +; CHECK-NEXT: store i8 [[V:%.*]], i8* [[GEP]], align 1 ; CHECK-NEXT: [[L0:%.*]] = bitcast i8* [[CALL]] to i32* ; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 20, i1 false) ; CHECK-NEXT: ret i8* [[CALL]] ; %call = tail call i8* @calloc(i64 10000, i64 4) + %gep = getelementptr i8, i8* %call, i64 %idx + store i8 %v, i8* %gep %l0 = bitcast i8* %call to i32* call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 20, i1 false) ret i8* %call } +define i8* @zero_memset_after_calloc_inaccessiblememonly() { +; CHECK-LABEL: @zero_memset_after_calloc_inaccessiblememonly( +; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) #[[ATTR6]] +; CHECK-NEXT: [[L0:%.*]] = bitcast i8* [[CALL]] to i32* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 0, i64 40000, i1 false) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = tail call i8* @calloc(i64 10000, i64 4) inaccessiblememonly + %l0 = bitcast i8* %call to i32* + call void @llvm.memset.p0i8.i64(i8* %call, i8 0, i64 40000, i1 false) + ret i8* %call +} + + +define i8* @nonzero_memset_after_calloc(i8 %v) { +; CHECK-LABEL: @nonzero_memset_after_calloc( +; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) +; CHECK-NEXT: [[L0:%.*]] = bitcast i8* [[CALL]] to i32* +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* [[CALL]], i8 [[V:%.*]], i64 40000, i1 false) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = tail call i8* @calloc(i64 10000, i64 4) + %l0 = bitcast i8* %call to i32* + call void @llvm.memset.p0i8.i64(i8* %call, i8 %v, i64 40000, i1 false) + ret i8* %call +} + ; PR11896 ; The first memset is dead, because calloc provides zero-filled memory. ; TODO: This could be replaced with a call to malloc + memset_pattern16. @@ -395,7 +439,6 @@ ; CHECK-LABEL: @memset_pattern16_after_calloc( ; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @calloc(i64 10000, i64 4) ; CHECK-NEXT: [[L0:%.*]] = bitcast i8* [[CALL]] to i32* -; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 4 [[CALL]], i8 0, i64 40000, i1 false) ; CHECK-NEXT: call void @memset_pattern16(i8* [[CALL]], i8* [[PAT:%.*]], i64 40000) ; CHECK-NEXT: ret i8* [[CALL]] ;