Index: clang/include/clang/Basic/Builtins.def =================================================================== --- clang/include/clang/Basic/Builtins.def +++ clang/include/clang/Basic/Builtins.def @@ -547,6 +547,7 @@ BUILTIN(__builtin_malloc, "v*z", "nF") BUILTIN(__builtin_memchr, "v*vC*iz", "nF") BUILTIN(__builtin_memcmp, "ivC*vC*z", "nF") +BUILTIN(__builtin_memcmp_inline, "ivC*vC*Iz", "nF") BUILTIN(__builtin_memcpy, "v*v*vC*z", "nF") BUILTIN(__builtin_memcpy_inline, "vv*vC*Iz", "nt") BUILTIN(__builtin_memmove, "v*v*vC*z", "nF") Index: clang/lib/CodeGen/CGBuilder.h =================================================================== --- clang/lib/CodeGen/CGBuilder.h +++ clang/lib/CodeGen/CGBuilder.h @@ -302,6 +302,13 @@ Src.getAlignment().getAsAlign(), getInt64(Size)); } + using CGBuilderBaseTy::CreateMemCmpInline; + llvm::CallInst *CreateMemCmpInline(Address Lhs, Address Rhs, uint64_t Size, + llvm::Type *RetType) { + return CreateMemCmpInline( + Lhs.getPointer(), Rhs.getPointer(), getInt64(Size), RetType); + } + using CGBuilderBaseTy::CreateMemMove; llvm::CallInst *CreateMemMove(Address Dest, Address Src, llvm::Value *Size, bool IsVolatile = false) { Index: clang/lib/CodeGen/CGBuiltin.cpp =================================================================== --- clang/lib/CodeGen/CGBuiltin.cpp +++ clang/lib/CodeGen/CGBuiltin.cpp @@ -3305,6 +3305,21 @@ return RValue::get(nullptr); } + case Builtin::BI__builtin_memcmp_inline: { + Address Lhs = EmitPointerWithAlignment(E->getArg(0)); + Address Rhs = EmitPointerWithAlignment(E->getArg(1)); + uint64_t Size = + E->getArg(2)->EvaluateKnownConstInt(getContext()).getZExtValue(); + EmitNonNullArgCheck(RValue::get(Lhs.getPointer()), E->getArg(0)->getType(), + E->getArg(0)->getExprLoc(), FD, 0); + EmitNonNullArgCheck(RValue::get(Rhs.getPointer()), E->getArg(1)->getType(), + E->getArg(1)->getExprLoc(), FD, 1); + QualType T = E->getType(); + llvm::IntegerType *RetType = + llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(T)); + return RValue::get(Builder.CreateMemCmpInline(Lhs, Rhs, Size, RetType)); + } + case Builtin::BI__builtin_char_memchr: BuiltinID = Builtin::BI__builtin_memchr; break; Index: clang/lib/Sema/SemaChecking.cpp =================================================================== --- clang/lib/Sema/SemaChecking.cpp +++ clang/lib/Sema/SemaChecking.cpp @@ -1717,6 +1717,19 @@ } break; } + case Builtin::BI__builtin_memcmp_inline: { + clang::Expr *SizeOp = TheCall->getArg(2); + // We warn about comparing`nullptr` pointers when `size` is greater than 0. + // When `size` is value dependent we cannot evaluate its value so we bail + // out. + if (SizeOp->isValueDependent()) + break; + if (!SizeOp->EvaluateKnownConstInt(Context).isNullValue()) { + CheckNonNullArgument(*this, TheCall->getArg(0), TheCall->getExprLoc()); + CheckNonNullArgument(*this, TheCall->getArg(1), TheCall->getExprLoc()); + } + break; + } #define BUILTIN(ID, TYPE, ATTRS) #define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \ case Builtin::BI##ID: \ Index: llvm/include/llvm/IR/IRBuilder.h =================================================================== --- llvm/include/llvm/IR/IRBuilder.h +++ llvm/include/llvm/IR/IRBuilder.h @@ -649,6 +649,8 @@ CallInst *CreateMemCpyInline(Value *Dst, MaybeAlign DstAlign, Value *Src, MaybeAlign SrcAlign, Value *Size); + CallInst *CreateMemCmpInline(Value *Lhs, Value *Rhs, Value *Size, + Type *RetType); /// Create and insert an element unordered-atomic memcpy between the /// specified pointers. Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -622,6 +622,18 @@ WriteOnly>, ReadOnly>, ImmArg>, ImmArg>]>; +// Memcpy semantic that is guaranteed to be inlined. +// In particular this means that the generated code is not allowed to call any +// external function. +// The third argument (specifying the size) must be a constant. +def int_memcmp_inline + : Intrinsic<[llvm_anyint_ty], + [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty], + [IntrArgMemOnly, IntrWillReturn, IntrNoFree, + NoCapture>, NoCapture>, + ReadOnly>, ReadOnly>, + ImmArg>]>; + def int_memmove : Intrinsic<[], [llvm_anyptr_ty, llvm_anyptr_ty, llvm_anyint_ty, llvm_i1_ty], Index: llvm/lib/CodeGen/ExpandMemCmp.cpp =================================================================== --- llvm/lib/CodeGen/ExpandMemCmp.cpp +++ llvm/lib/CodeGen/ExpandMemCmp.cpp @@ -738,11 +738,11 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI, const TargetLowering *TLI, const DataLayout *DL, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI, - DomTreeUpdater *DTU) { + DomTreeUpdater *DTU, bool IsInline) { NumMemCmpCalls++; // Early exit from expansion if -Oz. - if (CI->getFunction()->hasMinSize()) + if (!IsInline && CI->getFunction()->hasMinSize()) return false; // Early exit from expansion if size is not a constant. @@ -860,9 +860,12 @@ continue; } LibFunc Func; - if (TLI->getLibFunc(*CI, Func) && - (Func == LibFunc_memcmp || Func == LibFunc_bcmp) && - expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU)) { + auto *II = dyn_cast(CI); + bool IsInline = II && II->getIntrinsicID() == Intrinsic::memcmp_inline; + if (((TLI->getLibFunc(*CI, Func) && + (Func == LibFunc_memcmp || Func == LibFunc_bcmp)) || + IsInline) && + expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, IsInline)) { return true; } } Index: llvm/lib/IR/IRBuilder.cpp =================================================================== --- llvm/lib/IR/IRBuilder.cpp +++ llvm/lib/IR/IRBuilder.cpp @@ -227,6 +227,21 @@ return CI; } +CallInst *IRBuilderBase::CreateMemCmpInline(Value *Lhs, Value *Rhs, Value *Size, + Type *RetType) { + Lhs = getCastedInt8PtrValue(Lhs); + Rhs = getCastedInt8PtrValue(Rhs); + + Value *Ops[] = {Lhs, Rhs, Size}; + Type *Tys[] = {RetType, Lhs->getType(), Rhs->getType(), Size->getType()}; + Function *F = BB->getParent(); + Module *M = F->getParent(); + Function *TheFn = Intrinsic::getDeclaration(M, Intrinsic::memcmp_inline, Tys); + + CallInst *CI = createCallHelper(TheFn, Ops, this); + return CI; +} + CallInst *IRBuilderBase::CreateElementUnorderedAtomicMemCpy( Value *Dst, Align DstAlign, Value *Src, Align SrcAlign, Value *Size, uint32_t ElementSize, MDNode *TBAATag, MDNode *TBAAStructTag, Index: llvm/lib/Transforms/Utils/MemoryOpRemark.cpp =================================================================== --- llvm/lib/Transforms/Utils/MemoryOpRemark.cpp +++ llvm/lib/Transforms/Utils/MemoryOpRemark.cpp @@ -28,6 +28,7 @@ if (auto *II = dyn_cast(I)) { switch (II->getIntrinsicID()) { + case Intrinsic::memcmp_inline: case Intrinsic::memcpy_inline: case Intrinsic::memcpy: case Intrinsic::memmove: @@ -187,6 +188,10 @@ bool Atomic = false; bool Inline = false; switch (II.getIntrinsicID()) { + case Intrinsic::memcmp_inline: + CallTo = "memcmp"; + Inline = true; + break; case Intrinsic::memcpy_inline: CallTo = "memcpy"; Inline = true; @@ -235,6 +240,10 @@ case Intrinsic::memset_element_unordered_atomic: visitPtr(II.getOperand(0), /*IsRead=*/false, *R); break; + case Intrinsic::memcmp_inline: + visitPtr(II.getOperand(1), /*IsRead=*/true, *R); + visitPtr(II.getOperand(0), /*IsRead=*/true, *R); + break; } inlineVolatileOrAtomicWithExtraArgs(&Inline, Volatile, Atomic, *R); ORE.emit(*R);