diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7345,30 +7345,48 @@ checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace()); // Emit a library call. - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext()); - Args.push_back(Entry); - Entry.Node = Src; - Entry.Ty = Src.getValueType().getTypeForEVT(*getContext()); - Args.push_back(Entry); - Entry.Node = Size; - Entry.Ty = getDataLayout().getIntPtrType(*getContext()); - Args.push_back(Entry); + auto &Ctx = *getContext(); + const auto& DL = getDataLayout(); - // FIXME: pass in SDLoc TargetLowering::CallLoweringInfo CLI(*this); - CLI.setDebugLoc(dl) - .setChain(Chain) - .setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), - Dst.getValueType().getTypeForEVT(*getContext()), - getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), - TLI->getPointerTy(getDataLayout())), - std::move(Args)) - .setDiscardResult() - .setTailCall(isTailCall); + // FIXME: pass in SDLoc + CLI.setDebugLoc(dl).setChain(Chain); + + ConstantSDNode *ConstantSrc = dyn_cast(Src); + const bool SrcIsZero = ConstantSrc && ConstantSrc->isZero(); + const char *BzeroName = getTargetLoweringInfo().getLibcallName(RTLIB::BZERO); + + // Helper function to create an Entry from Node and Type. + const auto CreateEntry = [](SDValue Node, Type *Ty) { + TargetLowering::ArgListEntry Entry; + Entry.Node = Node; + Entry.Ty = Ty; + return Entry; + }; - std::pair CallResult = TLI->LowerCallTo(CLI); + // If zeroing out and bzero is present, use it. + if (SrcIsZero && BzeroName) { + TargetLowering::ArgListTy Args; + Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx))); + Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); + CLI.setLibCallee( + TLI->getLibcallCallingConv(RTLIB::BZERO), Type::getVoidTy(Ctx), + getExternalSymbol(BzeroName, TLI->getPointerTy(DL)), std::move(Args)); + } else { + TargetLowering::ArgListTy Args; + Args.push_back(CreateEntry(Dst, Type::getInt8PtrTy(Ctx))); + Args.push_back(CreateEntry(Src, Src.getValueType().getTypeForEVT(Ctx))); + Args.push_back(CreateEntry(Size, DL.getIntPtrType(Ctx))); + CLI.setLibCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), + Dst.getValueType().getTypeForEVT(Ctx), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy(DL)), + std::move(Args)); + } + + CLI.setDiscardResult().setTailCall(isTailCall); + + std::pair CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } diff --git a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -100,38 +100,6 @@ return EmitMOPS(AArch64ISD::MOPS_MEMSET, DAG, dl, Chain, Dst, Src, Size, Alignment, isVolatile, DstPtrInfo, MachinePointerInfo{}); } - - // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *V = dyn_cast(Src); - ConstantSDNode *SizeValue = dyn_cast(Size); - const char *bzeroName = - (V && V->isZero()) - ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) - : nullptr; - // For small size (< 256), it is not beneficial to use bzero - // instead of memset. - if (bzeroName && (!SizeValue || SizeValue->getZExtValue() > 256)) { - const AArch64TargetLowering &TLI = *STI.getTargetLowering(); - - EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); - Type *IntPtrTy = Type::getInt8PtrTy(*DAG.getContext()); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - Entry.Node = Size; - Args.push_back(Entry); - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl) - .setChain(Chain) - .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(bzeroName, IntPtr), - std::move(Args)) - .setDiscardResult(); - std::pair CallResult = TLI.LowerCallTo(CLI); - return CallResult.second; - } return SDValue(); } diff --git a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp --- a/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -67,40 +67,8 @@ // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. if (Alignment < Align(4) || !ConstantSize || - ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) { - // Check to see if there is a specialized entry-point for memory zeroing. - ConstantSDNode *ValC = dyn_cast(Val); - - if (const char *bzeroName = - (ValC && ValC->isZero()) - ? DAG.getTargetLoweringInfo().getLibcallName(RTLIB::BZERO) - : nullptr) { - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); - Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - Entry.Node = Dst; - Entry.Ty = IntPtrTy; - Args.push_back(Entry); - Entry.Node = Size; - Args.push_back(Entry); - - TargetLowering::CallLoweringInfo CLI(DAG); - CLI.setDebugLoc(dl) - .setChain(Chain) - .setLibCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(bzeroName, IntPtr), - std::move(Args)) - .setDiscardResult(); - - std::pair CallResult = TLI.LowerCallTo(CLI); - return CallResult.second; - } - - // Otherwise have the target-independent code call memset. + ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) return SDValue(); - } uint64_t SizeVal = ConstantSize->getZExtValue(); SDValue InFlag; diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll b/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll --- a/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memset-to-bzero.ll @@ -1,48 +1,33 @@ -; RUN: llc %s -enable-machine-outliner=never -mtriple=arm64-apple-darwin -o - \ -; RUN: | FileCheck --check-prefixes=CHECK,CHECK-DARWIN %s -; RUN: llc %s -enable-machine-outliner=never -mtriple=arm64-linux-gnu -o - | \ -; RUN: FileCheck --check-prefixes=CHECK,CHECK-LINUX %s +; RUN: llc %s -enable-machine-outliner=never -mtriple=arm64-apple-darwin -o - | FileCheck %s --check-prefix=DARWIN +; RUN: llc %s -enable-machine-outliner=never -mtriple=arm64-linux-gnu -o - | FileCheck %s --check-prefix=LINUX ; ARM64: Calls to bzero() replaced with calls to memset() +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) + ; CHECK-LABEL: fct1: -; For small size (<= 256), we do not change memset to bzero. -; CHECK-DARWIN: {{b|bl}} _memset -; CHECK-LINUX: {{b|bl}} memset +; Constant size memset to zero. +; DARWIN: {{b|bl}} _bzero +; LINUX: {{b|bl}} memset define void @fct1(i8* nocapture %ptr) minsize { -entry: tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 256, i1 false) ret void } -declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) - -; CHECK-LABEL: fct2: -; When the size is bigger than 256, change into bzero. -; CHECK-DARWIN: {{b|bl}} _bzero -; CHECK-LINUX: {{b|bl}} memset -define void @fct2(i8* nocapture %ptr) minsize { -entry: - tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 257, i1 false) - ret void -} - ; CHECK-LABEL: fct3: -; For unknown size, change to bzero. -; CHECK-DARWIN: {{b|bl}} _bzero -; CHECK-LINUX: {{b|bl}} memset +; Variable size memset to zero. +; DARWIN: {{b|bl}} _bzero +; LINUX: {{b|bl}} memset define void @fct3(i8* nocapture %ptr, i32 %unknown) minsize { -entry: %conv = sext i32 %unknown to i64 tail call void @llvm.memset.p0i8.i64(i8* %ptr, i8 0, i64 %conv, i1 false) ret void } ; CHECK-LABEL: fct4: -; Size <= 256, no change. -; CHECK-DARWIN: {{b|bl}} _memset -; CHECK-LINUX: {{b|bl}} memset +; Variable size checked memset to zero. +; DARWIN: {{b|bl}} _bzero +; LINUX: {{b|bl}} memset define void @fct4(i8* %ptr) minsize { -entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 256, i64 %tmp) ret void @@ -52,23 +37,11 @@ declare i64 @llvm.objectsize.i64(i8*, i1) -; CHECK-LABEL: fct5: -; Size > 256, change. -; CHECK-DARWIN: {{b|bl}} _bzero -; CHECK-LINUX: {{b|bl}} memset -define void @fct5(i8* %ptr) minsize { -entry: - %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) - %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 257, i64 %tmp) - ret void -} - ; CHECK-LABEL: fct6: ; Size = unknown, change. -; CHECK-DARWIN: {{b|bl}} _bzero -; CHECK-LINUX: {{b|bl}} memset +; DARWIN: {{b|bl}} _bzero +; LINUX: {{b|bl}} memset define void @fct6(i8* %ptr, i32 %unknown) minsize { -entry: %conv = sext i32 %unknown to i64 %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) %call = tail call i8* @__memset_chk(i8* %ptr, i32 0, i64 %conv, i64 %tmp) @@ -80,10 +53,9 @@ ; CHECK-LABEL: fct7: ; memset with something that is not a zero, no change. -; CHECK-DARWIN: {{b|bl}} _memset -; CHECK-LINUX: {{b|bl}} memset +; DARWIN: {{b|bl}} _memset +; LINUX: {{b|bl}} memset define void @fct7(i8* %ptr) minsize { -entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 256, i64 %tmp) ret void @@ -91,10 +63,9 @@ ; CHECK-LABEL: fct8: ; memset with something that is not a zero, no change. -; CHECK-DARWIN: {{b|bl}} _memset -; CHECK-LINUX: {{b|bl}} memset +; DARWIN: {{b|bl}} _memset +; LINUX: {{b|bl}} memset define void @fct8(i8* %ptr) minsize { -entry: %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 257, i64 %tmp) ret void @@ -102,10 +73,9 @@ ; CHECK-LABEL: fct9: ; memset with something that is not a zero, no change. -; CHECK-DARWIN: {{b|bl}} _memset -; CHECK-LINUX: {{b|bl}} memset +; DARWIN: {{b|bl}} _memset +; LINUX: {{b|bl}} memset define void @fct9(i8* %ptr, i32 %unknown) minsize { -entry: %conv = sext i32 %unknown to i64 %tmp = tail call i64 @llvm.objectsize.i64(i8* %ptr, i1 false) %call = tail call i8* @__memset_chk(i8* %ptr, i32 1, i64 %conv, i64 %tmp) diff --git a/llvm/test/CodeGen/AArch64/arm64_32.ll b/llvm/test/CodeGen/AArch64/arm64_32.ll --- a/llvm/test/CodeGen/AArch64/arm64_32.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32.ll @@ -732,11 +732,26 @@ ret { [18 x i8] }* %tmp0 } +define void @test_memset(i64 %in, i8 %value) { +; CHECK-LABEL: test_memset: +; CHECK-DAG: and x8, x0, #0xffffffff +; CHECK-DAG: lsr x2, x0, #32 +; CHECK-DAG: mov x0, x8 +; CHECK: b _memset + + %ptr.i32 = trunc i64 %in to i32 + %size.64 = lshr i64 %in, 32 + %size = trunc i64 %size.64 to i32 + %ptr = inttoptr i32 %ptr.i32 to i8* + tail call void @llvm.memset.p0i8.i32(i8* align 4 %ptr, i8 %value, i32 %size, i1 false) + ret void +} + define void @test_bzero(i64 %in) { ; CHECK-LABEL: test_bzero: ; CHECK-DAG: lsr x1, x0, #32 ; CHECK-DAG: and x0, x0, #0xffffffff -; CHECK: bl _bzero +; CHECK: b _bzero %ptr.i32 = trunc i64 %in to i32 %size.64 = lshr i64 %in, 32