Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -835,19 +835,37 @@ /// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n). static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B, const TargetLibraryInfo &TLI) { + auto Src = Memset->getArgOperand(0); // This has to be a memset of zeros (bzero). auto *FillValue = dyn_cast(Memset->getArgOperand(1)); if (!FillValue || FillValue->getZExtValue() != 0) return nullptr; - // TODO: We should handle the case where the malloc has more than one use. - // This is necessary to optimize common patterns such as when the result of - // the malloc is checked against null or when a memset intrinsic is used in - // place of a memset library call. - auto *Malloc = dyn_cast(Memset->getArgOperand(0)); - if (!Malloc || !Malloc->hasOneUse()) + auto *Malloc = dyn_cast(Src); + if (!Malloc) return nullptr; + if (!Malloc->hasOneUse()) { + Instruction *I = Malloc->getNextNode(); + while (I) { + auto *Call = dyn_cast(I); + if (Call && Call == Memset) + break; + + for (unsigned i = 0; i < I->getNumOperands(); ++i) { + if (Src == I->getOperand(i)) { + return nullptr; + } + } + + I = I->getNextNode(); + + // If we cannot find memset in current basic block, dont fold + if (!I) + return nullptr; + } + } + // Is the inner call really malloc()? Function *InnerCallee = Malloc->getCalledFunction(); if (!InnerCallee) @@ -883,6 +901,10 @@ if (auto *Calloc = foldMallocMemset(CI, B, *TLI)) return Calloc; + if (CI->getCalledFunction()->isIntrinsic()) { + return nullptr; + } + // memset(p, v, n) -> llvm.memset(align 1 p, v, n) Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); @@ -2219,7 +2241,8 @@ return optimizeLog(CI, Builder); case Intrinsic::sqrt: return optimizeSqrt(CI, Builder); - // TODO: Use foldMallocMemset() with memset intrinsic. + case Intrinsic::memset: + return optimizeMemSet(CI, Builder); default: return nullptr; } Index: test/Transforms/InstCombine/memset-1.ll =================================================================== --- test/Transforms/InstCombine/memset-1.ll +++ test/Transforms/InstCombine/memset-1.ll @@ -31,17 +31,14 @@ ret i8* %call2 } -; FIXME: A memset intrinsic should be handled similarly to a memset() libcall. - -define i8* @malloc_and_memset_intrinsic(i32 %n) #0 { -; CHECK-LABEL: @malloc_and_memset_intrinsic( -; CHECK-NEXT: [[CALL:%.*]] = call i8* @malloc(i32 [[N:%.*]]) -; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* align 1 [[CALL]], i8 0, i32 [[N]], i1 false) -; CHECK-NEXT: ret i8* [[CALL]] +define i8* @memset_intristic_malloc(i32 %n) #0 { +; CHECK-LABEL: @memset_intristic_malloc( +; CHECK-NEXT: [[CALLOC:%.*]] = call i8* @calloc(i32 1, i32 [[N:%.*]]) +; CHECK-NEXT: ret i8* [[CALLOC]] ; %call = call i8* @malloc(i32 %n) call void @llvm.memset.p0i8.i32(i8* %call, i8 0, i32 %n, i32 1, i1 false) - ret i8* %call + ret i8*%call } ; This should not create a calloc and should not crash the compiler.