Index: include/llvm/Transforms/Utils/SimplifyLibCalls.h =================================================================== --- include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -124,6 +124,7 @@ Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B); Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B); Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B); + Value *optimizeIntrinsicMemSet(CallInst *CI, IRBuilder<> &B); Value *optimizeRealloc(CallInst *CI, IRBuilder<> &B); Value *optimizeWcslen(CallInst *CI, IRBuilder<> &B); // Wrapper for all String/Memory Library Call Optimizations Index: lib/Transforms/Utils/BuildLibCalls.cpp =================================================================== --- lib/Transforms/Utils/BuildLibCalls.cpp +++ lib/Transforms/Utils/BuildLibCalls.cpp @@ -701,6 +701,7 @@ // TODO: add LibFunc entries for: // case LibFunc_memset_pattern4: // case LibFunc_memset_pattern8: + case LibFunc_memset: case LibFunc_memset_pattern16: Changed |= setOnlyAccessesArgMemory(F); Changed |= setDoesNotCapture(F, 0); Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -21,6 +21,7 @@ #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -844,12 +845,8 @@ if (!FillValue || FillValue->getZExtValue() != 0) return nullptr; - // TODO: We should handle the case where the malloc has more than one use. - // This is necessary to optimize common patterns such as when the result of - // the malloc is checked against null or when a memset intrinsic is used in - // place of a memset library call. auto *Malloc = dyn_cast(Memset->getArgOperand(0)); - if (!Malloc || !Malloc->hasOneUse()) + if (!Malloc) return nullptr; // Is the inner call really malloc()? @@ -862,18 +859,38 @@ Func != LibFunc_malloc) return nullptr; + // Check if malloc value is captured + if (PointerMayBeCaptured(Malloc, /* ReturnCaptures */ false, + /* StoreCaptures */ true)) + return nullptr; + // The memset must cover the same number of bytes that are malloc'd. if (Memset->getArgOperand(2) != Malloc->getArgOperand(0)) return nullptr; + Instruction *Inst; + bool MemsetFound = false; + for (Use &U : Malloc->uses()) { + Inst = cast(U.getUser()); + + if (Inst == Memset) + MemsetFound = true; + + // Remove stores between malloc and memset + if (MemsetFound && isa(U.getUser())) { + if (Inst->getOperand(1) == Malloc) + Inst->eraseFromParent(); + } + } + // Replace the malloc with a calloc. We need the data layout to know what the - // actual size of a 'size_t' parameter is. + // actual size of a 'size_t' parameter is. B.SetInsertPoint(Malloc->getParent(), ++Malloc->getIterator()); const DataLayout &DL = Malloc->getModule()->getDataLayout(); IntegerType *SizeType = DL.getIntPtrType(B.GetInsertBlock()->getContext()); - Value *Calloc = emitCalloc(ConstantInt::get(SizeType, 1), - Malloc->getArgOperand(0), Malloc->getAttributes(), - B, TLI); + Value *Calloc = + emitCalloc(ConstantInt::get(SizeType, 1), Malloc->getArgOperand(0), + Malloc->getAttributes(), B, TLI); if (!Calloc) return nullptr; @@ -900,6 +917,14 @@ return nullptr; } +Value *LibCallSimplifier::optimizeIntrinsicMemSet(CallInst *CI, + IRBuilder<> &B) { + if (auto *Calloc = foldMallocMemset(CI, B, *TLI)) + return Calloc; + + return nullptr; +} + //===----------------------------------------------------------------------===// // Math Library Optimizations //===----------------------------------------------------------------------===// @@ -2255,7 +2280,8 @@ return optimizeLog(CI, Builder); case Intrinsic::sqrt: return optimizeSqrt(CI, Builder); - // TODO: Use foldMallocMemset() with memset intrinsic. + case Intrinsic::memset: + return optimizeIntrinsicMemSet(CI, Builder); default: return nullptr; } Index: test/Transforms/InstCombine/memset-1.ll =================================================================== --- test/Transforms/InstCombine/memset-1.ll +++ test/Transforms/InstCombine/memset-1.ll @@ -5,7 +5,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" -declare i8* @memset(i8*, i32, i32) +declare i8* @memset(i8* nocapture, i32, i32) declare void @llvm.memset.p0i8.i32(i8* nocapture writeonly, i8, i32, i32, i1) declare noalias i8* @malloc(i32) #1 @@ -35,9 +35,8 @@ define i8* @malloc_and_memset_intrinsic(i32 %n) #0 { ; CHECK-LABEL: @malloc_and_memset_intrinsic( -; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i32 [[N:%.*]]) -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 [[CALL]], i8 0, i32 [[N]], i1 false) -; CHECK-NEXT: ret i8* [[CALL]] +; CHECK-NEXT: [[CALLOC:%.*]] = call i8* @calloc(i32 1, i32 [[N:%.*]]) +; CHECK-NEXT: ret i8* [[CALLOC]] ; %call = call i8* @malloc(i32 %n) call void @llvm.memset.p0i8.i32(i8* %call, i8 0, i32 %n, i32 1, i1 false) @@ -57,18 +56,16 @@ ret i8* %call2 } -; FIXME: memset(malloc(x), 0, x) -> calloc(1, x) -; This doesn't fire currently because the malloc has more than one use. +; Malloc has more than one use. define float* @pr25892(i32 %size) #0 { ; CHECK-LABEL: @pr25892( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @malloc(i32 [[SIZE:%.*]]) #0 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALL]], null +; CHECK-NEXT: [[CALLOC:%.*]] = call i8* @calloc(i32 1, i32 [[SIZE:%.*]]) +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8* [[CALLOC]], null ; CHECK-NEXT: br i1 [[CMP]], label [[CLEANUP:%.*]], label [[IF_END:%.*]] ; CHECK: if.end: -; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALL]] to float* -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* nonnull align 1 [[CALL]], i8 0, i32 [[SIZE]], i1 false) +; CHECK-NEXT: [[BC:%.*]] = bitcast i8* [[CALLOC]] to float* ; CHECK-NEXT: br label [[CLEANUP]] ; CHECK: cleanup: ; CHECK-NEXT: [[RETVAL_0:%.*]] = phi float* [ [[BC]], [[IF_END]] ], [ null, [[ENTRY:%.*]] ] @@ -91,10 +88,8 @@ define i8* @buffer_is_modified_then_memset(i32 %size) { ; CHECK-LABEL: @buffer_is_modified_then_memset( -; CHECK-NEXT: [[PTR:%.*]] = tail call i8* @malloc(i32 [[SIZE:%.*]]) #0 -; CHECK-NEXT: store i8 1, i8* [[PTR]], align 1 -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 [[PTR]], i8 0, i32 [[SIZE]], i1 false) -; CHECK-NEXT: ret i8* [[PTR]] +; CHECK-NEXT: [[CALLOC:%.*]] = call i8* @calloc(i32 1, i32 [[SIZE:%.*]]) +; CHECK-NEXT: ret i8* [[CALLOC]] ; %ptr = tail call i8* @malloc(i32 %size) #1 store i8 1, i8* %ptr ;; fdata[0] = 1; @@ -102,7 +97,20 @@ ret i8* %memset } +define i8* @buffer_preserve_store_after_memset(i32 %size) { +; CHECK-LABEL: @buffer_preserve_store_after_memset( +; CHECK-NEXT: [[CALLOC:%.*]] = call i8* @calloc(i32 1, i32 [[SIZE:%.*]]) +; CHECK-NEXT: store i8 1, i8* [[CALLOC]], align 1 +; CHECK-NEXT: ret i8* [[CALLOC]] +; + %ptr = tail call i8* @malloc(i32 %size) #1 + %memset = tail call i8* @memset(i8* nonnull %ptr, i32 0, i32 %size) #1 + store i8 1, i8* %ptr ;; fdata[0] = 1; + ret i8* %memset +} + + + attributes #0 = { nounwind ssp uwtable } attributes #1 = { nounwind } attributes #2 = { nounwind readnone } -