diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp --- a/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -52,9 +52,9 @@ return true; } -static bool hasBcmp(const Triple &TT) { - // Posix removed support from bcmp() in 2001, but the glibc and several - // implementations of the libc still have it. +static bool hasPosix2001LibCFunctions(const Triple &TT) { + // Posix removed support from some libc functions in 2001 (bcmp, bzero), but + // the glibc and several implementations of the libc still have it. if (TT.isOSLinux()) return TT.isGNUEnvironment() || TT.isMusl(); // Both NetBSD and OpenBSD are planning to remove the function. Windows does @@ -145,8 +145,10 @@ TLI.setUnavailable(LibFunc_sincospif_stret); } - if (!hasBcmp(T)) + if (!hasPosix2001LibCFunctions(T)) { + TLI.setUnavailable(LibFunc_bzero); TLI.setUnavailable(LibFunc_bcmp); + } if (T.isMacOSX() && T.getArch() == Triple::x86 && !T.isMacOSXVersionLT(10, 7)) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -24,6 +24,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/ISDOpcodes.h" #include "llvm/CodeGen/MachineBasicBlock.h" @@ -6377,7 +6378,7 @@ } SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst, - SDValue Src, SDValue Size, unsigned Align, + SDValue ByteValue, SDValue Size, unsigned Align, bool isVol, bool isTailCall, MachinePointerInfo DstPtrInfo) { assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); @@ -6391,8 +6392,8 @@ return Chain; SDValue Result = - getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(), - Align, isVol, DstPtrInfo); + getMemsetStores(*this, dl, Chain, Dst, ByteValue, + ConstantSize->getZExtValue(), Align, isVol, DstPtrInfo); if (Result.getNode()) return Result; @@ -6402,39 +6403,55 @@ // code. If the target chooses to do this, this is the next best. if (TSI) { SDValue Result = TSI->EmitTargetCodeForMemset( - *this, dl, Chain, Dst, Src, Size, Align, isVol, DstPtrInfo); + *this, dl, Chain, Dst, ByteValue, Size, Align, isVol, DstPtrInfo); if (Result.getNode()) return Result; } checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); + const auto MakeArg = [](Type *Ty, SDValue Node) { + TargetLowering::ArgListEntry Entry; + Entry.Node = Node; + Entry.Ty = Ty; + return Entry; + }; + // Emit a library call. - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext()); - Args.push_back(Entry); - Entry.Node = Src; - Entry.Ty = Src.getValueType().getTypeForEVT(*getContext()); - Args.push_back(Entry); - Entry.Node = Size; - Entry.Ty = getDataLayout().getIntPtrType(*getContext()); - Args.push_back(Entry); + llvm::LLVMContext &Ctx = *getContext(); + const llvm::DataLayout &DL = getDataLayout(); + ConstantSDNode *ConstantByteValue = dyn_cast(ByteValue); + if (ConstantByteValue && ConstantByteValue->isNullValue() && + LibInfo->has(LibFunc_bzero)) { + TargetLowering::CallLoweringInfo CLI(*this); + CLI.setDebugLoc(dl) + .setChain(Chain) + .setLibCallee(CallingConv::C, Dst.getValueType().getTypeForEVT(Ctx), + getExternalSymbol(LibInfo->getName(LibFunc_bzero).data(), + TLI->getPointerTy(DL)), + {MakeArg(Type::getInt8PtrTy(Ctx), Dst), + MakeArg(DL.getIntPtrType(Ctx), Size)}) + .setDiscardResult(); + + return TLI->LowerCallTo(CLI).second; + } // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); CLI.setDebugLoc(dl) .setChain(Chain) - .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), - Dst.getValueType().getTypeForEVT(*getContext()), - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), - TLI->getPointerTy(getDataLayout())), - std::move(Args)) + .setLibCallee( + TLI->getLibcallCallingConv(RTLIB::MEMSET), + Dst.getValueType().getTypeForEVT(Ctx), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy(DL)), + {MakeArg(Type::getInt8PtrTy(Ctx), Dst), + MakeArg(ByteValue.getValueType().getTypeForEVT(Ctx), ByteValue), + MakeArg(DL.getIntPtrType(Ctx), Size)}) .setDiscardResult() .setTailCall(isTailCall); - std::pair CallResult = TLI->LowerCallTo(CLI); - return CallResult.second; + return TLI->LowerCallTo(CLI).second; } SDValue SelectionDAG::getAtomicMemset(SDValue Chain, const SDLoc &dl, diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll b/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll --- a/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll @@ -8,7 +8,7 @@ ; For small size (<= 256), we do not change memset to bzero. ; CHECK-DARWIN: {{b|bl}} _memset ; CHECK-LINUX: {{b|bl}} memset -define void @fct1(i8* nocapture %ptr) minsize { +define void @fct1(i8* nocapture %ptr) minsize nounwind { entry: tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i1 false) ret void @@ -20,7 +20,7 @@ ; When the size is bigger than 256, change into bzero. ; CHECK-DARWIN: {{b|bl}} _bzero ; CHECK-LINUX: {{b|bl}} memset -define void @fct2(i8* nocapture %ptr) minsize { +define void @fct2(i8* nocapture %ptr) minsize nounwind { entry: tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i1 false) ret void @@ -30,7 +30,7 @@ ; For unknown size, change to bzero. ; CHECK-DARWIN: {{b|bl}} _bzero ; CHECK-LINUX: {{b|bl}} memset -define void @fct3(i8* nocapture %ptr, i32 %unknown) minsize { +define void @fct3(i8* nocapture %ptr, i32 %unknown) minsize nounwind { entry: %conv = sext i32 %unknown to i64 tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i1 false) @@ -41,7 +41,7 @@ ; Size <= 256, no change. ; CHECK-DARWIN: {{b|bl}} _memset ; CHECK-LINUX: {{b|bl}} memset -define void @fct4(i8* %ptr) minsize { +define void @fct4(i8* %ptr) minsize nounwind { entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 256, i64 %tmp) @@ -56,7 +56,7 @@ ; Size > 256, change. ; CHECK-DARWIN: {{b|bl}} _bzero ; CHECK-LINUX: {{b|bl}} memset -define void @fct5(i8* %ptr) minsize { +define void @fct5(i8* %ptr) minsize nounwind { entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 257, i64 %tmp) @@ -67,7 +67,7 @@ ; Size = unknown, change. ; CHECK-DARWIN: {{b|bl}} _bzero ; CHECK-LINUX: {{b|bl}} memset -define void @fct6(i8* %ptr, i32 %unknown) minsize { +define void @fct6(i8* %ptr, i32 %unknown) minsize nounwind { entry: %conv = sext i32 %unknown to i64 %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) @@ -82,7 +82,7 @@ ; memset with something that is not a zero, no change. ; CHECK-DARWIN: {{b|bl}} _memset ; CHECK-LINUX: {{b|bl}} memset -define void @fct7(i8* %ptr) minsize { +define void @fct7(i8* %ptr) minsize nounwind { entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 256, i64 %tmp) @@ -93,7 +93,7 @@ ; memset with something that is not a zero, no change. ; CHECK-DARWIN: {{b|bl}} _memset ; CHECK-LINUX: {{b|bl}} memset -define void @fct8(i8* %ptr) minsize { +define void @fct8(i8* %ptr) minsize nounwind { entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 257, i64 %tmp) @@ -104,7 +104,7 @@ ; memset with something that is not a zero, no change. ; CHECK-DARWIN: {{b|bl}} _memset ; CHECK-LINUX: {{b|bl}} memset -define void @fct9(i8* %ptr, i32 %unknown) minsize { +define void @fct9(i8* %ptr, i32 %unknown) minsize nounwind { entry: %conv = sext i32 %unknown to i64 %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false)