Index: include/llvm/Transforms/Utils/SimplifyLibCalls.h =================================================================== --- include/llvm/Transforms/Utils/SimplifyLibCalls.h +++ include/llvm/Transforms/Utils/SimplifyLibCalls.h @@ -167,9 +167,9 @@ Value *optimizeMemCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeBCmp(CallInst *CI, IRBuilder<> &B); Value *optimizeMemCmpBCmpCommon(CallInst *CI, IRBuilder<> &B); - Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B); - Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B); - Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B); + Value *optimizeMemCpy(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false); + Value *optimizeMemMove(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false); + Value *optimizeMemSet(CallInst *CI, IRBuilder<> &B, bool isIntrinsic = false); Value *optimizeRealloc(CallInst *CI, IRBuilder<> &B); Value *optimizeWcslen(CallInst *CI, IRBuilder<> &B); // Wrapper for all String/Memory Library Call Optimizations Index: lib/Transforms/Utils/SimplifyLibCalls.cpp =================================================================== --- lib/Transforms/Utils/SimplifyLibCalls.cpp +++ lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -186,6 +186,20 @@ return true; } +static void annotateDereferenceableBytes(CallInst *CI, + ArrayRef ArgNos, + uint64_t DerefBytes) { + for (unsigned ArgNo : ArgNos) { + if (CI->getDereferenceableBytes(ArgNo + 1) < DerefBytes) { + CI->removeParamAttr(ArgNo, Attribute::Dereferenceable); + CI->removeParamAttr(ArgNo, Attribute::DereferenceableOrNull); + // TODO: CallSite does not have an `addParamAttr` for integer attributes. + CI->addParamAttr(ArgNo, Attribute::getWithDereferenceableBytes( + CI->getContext(), DerefBytes)); + } + } +} + //===----------------------------------------------------------------------===// // String and Memory Library Call Optimizations //===----------------------------------------------------------------------===// @@ -765,9 +779,11 @@ ConstantInt *LenC = dyn_cast(CI->getArgOperand(2)); // memchr(x, y, 0) -> null - if (LenC && LenC->isZero()) - return Constant::getNullValue(CI->getType()); - + if (LenC) { + annotateDereferenceableBytes(CI, {0}, LenC->getZExtValue()); + if (LenC->isZero()) + return Constant::getNullValue(CI->getType()); + } // From now on we need at least constant length and string. StringRef Str; if (!LenC || !getConstantStringInfo(SrcStr, Str, 0, /*TrimAtNul=*/false)) @@ -926,10 +942,12 @@ return Constant::getNullValue(CI->getType()); // Handle constant lengths. - if (ConstantInt *LenC = dyn_cast(Size)) + if (ConstantInt *LenC = dyn_cast(Size)) { + annotateDereferenceableBytes(CI, {0, 1}, LenC->getZExtValue()); if (Value *Res = optimizeMemCmpConstantSize(CI, LHS, RHS, LenC->getZExtValue(), B, DL)) return Res; + } return nullptr; } @@ -955,18 +973,33 @@ return optimizeMemCmpBCmpCommon(CI, B); } -Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { +Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B, + bool isIntrinsic) { + Value *Op0 = CI->getArgOperand(0), *Op1 = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); + if (ConstantInt *LenC = dyn_cast(Size)) + annotateDereferenceableBytes(CI, {0, 1}, LenC->getZExtValue()); + + if (isIntrinsic) + return nullptr; + // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n) - B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); - return CI->getArgOperand(0); + B.CreateMemCpy(Op0, 1, Op1, 1, Size); + return Op0; } -Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { +Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B, bool isIntrinsic) { + Value *Op0 = CI->getArgOperand(0), *Op1 = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); + if (ConstantInt *LenC = dyn_cast(Size)) + annotateDereferenceableBytes(CI, {0, 1}, LenC->getZExtValue()); + + if (isIntrinsic) + return nullptr; + // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n) - B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, - CI->getArgOperand(2)); - return CI->getArgOperand(0); + B.CreateMemMove(Op0, 1, Op1, 1, Size); + return Op0; } /// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n). @@ -1015,14 +1048,23 @@ return Calloc; } -Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) { +Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B, + bool isIntrinsic) { + Value *Op0 = CI->getArgOperand(0), *Op1 = CI->getArgOperand(1); + Value *Size = CI->getArgOperand(2); + if (ConstantInt *LenC = dyn_cast(Size)) + annotateDereferenceableBytes(CI, {0}, LenC->getZExtValue()); + + if (isIntrinsic) + return nullptr; + if (auto *Calloc = foldMallocMemset(CI, B)) return Calloc; // memset(p, v, n) -> llvm.memset(align 1 p, v, n) - Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false); - B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1); - return CI->getArgOperand(0); + Value *Val = B.CreateIntCast(Op1, B.getInt8Ty(), false); + B.CreateMemSet(Op0, Val, Size, 1); + return Op0; } Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilder<> &B) { @@ -2710,6 +2752,12 @@ case Intrinsic::sqrt: return optimizeSqrt(CI, Builder); // TODO: Use foldMallocMemset() with memset intrinsic. + case Intrinsic::memset: + return optimizeMemSet(CI, Builder, true); + case Intrinsic::memcpy: + return optimizeMemCpy(CI, Builder, true); + case Intrinsic::memmove: + return optimizeMemMove(CI, Builder, true); default: return nullptr; } Index: test/Transforms/InstCombine/mem-deref-bytes.ll =================================================================== --- test/Transforms/InstCombine/mem-deref-bytes.ll +++ test/Transforms/InstCombine/mem-deref-bytes.ll @@ -0,0 +1,127 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -instcombine -S < %s | FileCheck %s + +declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) +declare i8* @memcpy(i8* nocapture, i8* nocapture, i64) +declare i8* @memmove(i8* nocapture, i8* nocapture, i64) +declare i8* @memset(i8* nocapture, i8, i64) +declare i8* @memchr(i8* nocapture, i32, i64) +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) +declare void @llvm.memmove.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) +declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) + +define i32 @memcmp_const_size_set_deref(i8* nocapture readonly %d, i8* nocapture readonly %s) { +; CHECK-LABEL: @memcmp_const_size_set_deref( +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* dereferenceable(16) [[D:%.*]], i8* dereferenceable(16) [[S:%.*]], i64 16) +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = tail call i32 @memcmp(i8* %d, i8* %s, i64 16) + ret i32 %call +} + +define i32 @memcmp_const_size_update_deref(i8* nocapture readonly %d, i8* nocapture readonly %s) { +; CHECK-LABEL: @memcmp_const_size_update_deref( +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* dereferenceable(16) [[D:%.*]], i8* dereferenceable(16) [[S:%.*]], i64 16) +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = tail call i32 @memcmp(i8* dereferenceable(4) %d, i8* dereferenceable(8) %s, i64 16) + ret i32 %call +} + +define i32 @memcmp_const_size_update_deref2(i8* nocapture readonly %d, i8* nocapture readonly %s) { +; CHECK-LABEL: @memcmp_const_size_update_deref2( +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* dereferenceable(16) [[D:%.*]], i8* dereferenceable(16) [[S:%.*]], i64 16) +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = tail call i32 @memcmp(i8* %d, i8* dereferenceable_or_null(8) %s, i64 16) + ret i32 %call +} + +define i32 @memcmp_const_size_no_update_deref(i8* nocapture readonly %d, i8* nocapture readonly %s) { +; CHECK-LABEL: @memcmp_const_size_no_update_deref( +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* dereferenceable(40) [[D:%.*]], i8* dereferenceable(16) [[S:%.*]], i64 16) +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = tail call i32 @memcmp(i8* dereferenceable(40) %d, i8* %s, i64 16) + ret i32 %call +} + +define i32 @memcmp_const_size_no_update_deref2(i8* nocapture readonly %d, i8* nocapture readonly %s) { +; CHECK-LABEL: @memcmp_const_size_no_update_deref2( +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* dereferenceable(16) [[D:%.*]], i8* dereferenceable(16) [[S:%.*]], i64 16) +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = tail call i32 @memcmp(i8* dereferenceable_or_null(40) %d, i8* %s, i64 16) + ret i32 %call +} + +define i32 @memcmp_nonconst_size(i8* nocapture readonly %d, i8* nocapture readonly %s, i64 %n) { +; CHECK-LABEL: @memcmp_nonconst_size( +; CHECK-NEXT: [[CALL:%.*]] = tail call i32 @memcmp(i8* [[D:%.*]], i8* [[S:%.*]], i64 [[N:%.*]]) +; CHECK-NEXT: ret i32 [[CALL]] +; + %call = tail call i32 @memcmp(i8* %d, i8* %s, i64 %n) + ret i32 %call +} + +define i8* @memcpy_const_size_set_deref(i8* nocapture readonly %d, i8* nocapture readonly %s) { +; CHECK-LABEL: @memcpy_const_size_set_deref( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 dereferenceable(64) [[D:%.*]], i8* align 1 dereferenceable(64) [[S:%.*]], i64 64, i1 false) +; CHECK-NEXT: ret i8* [[D]] +; + %call = tail call i8* @memcpy(i8* %d, i8* %s, i64 64) + ret i8* %call +} + +define i8* @memmove_const_size_set_deref(i8* nocapture readonly %d, i8* nocapture readonly %s) { +; CHECK-LABEL: @memmove_const_size_set_deref( +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 dereferenceable(64) [[D:%.*]], i8* align 1 dereferenceable(64) [[S:%.*]], i64 64, i1 false) +; CHECK-NEXT: ret i8* [[D]] +; + %call = tail call i8* @memmove(i8* %d, i8* %s, i64 64) + ret i8* %call +} + +define i8* @memset_const_size_set_deref(i8* nocapture readonly %s, i8 %c) { +; CHECK-LABEL: @memset_const_size_set_deref( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 dereferenceable(64) [[S:%.*]], i8 [[C:%.*]], i64 64, i1 false) +; CHECK-NEXT: ret i8* [[S]] +; + %call = tail call i8* @memset(i8* %s, i8 %c, i64 64) + ret i8* %call +} + +define i8* @memchr_const_size_set_deref(i8* nocapture readonly %s, i32 %c) { +; CHECK-LABEL: @memchr_const_size_set_deref( +; CHECK-NEXT: [[CALL:%.*]] = tail call i8* @memchr(i8* dereferenceable(64) [[S:%.*]], i32 [[C:%.*]], i64 64) +; CHECK-NEXT: ret i8* [[CALL]] +; + %call = tail call i8* @memchr(i8* %s, i32 %c, i64 64) + ret i8* %call +} + +define i8* @llvm_memcpy_const_size_set_deref(i8* nocapture readonly %d, i8* nocapture readonly %s) { +; CHECK-LABEL: @llvm_memcpy_const_size_set_deref( +; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 dereferenceable(16) [[D:%.*]], i8* align 1 dereferenceable(16) [[S:%.*]], i64 16, i1 false) +; CHECK-NEXT: ret i8* [[D]] +; + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 %d, i8* align 1 %s, i64 16, i1 false) + ret i8* %d +} + +define i8* @llvm_memmove_const_size_set_deref(i8* nocapture readonly %d, i8* nocapture readonly %s) { +; CHECK-LABEL: @llvm_memmove_const_size_set_deref( +; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 dereferenceable(16) [[D:%.*]], i8* align 1 dereferenceable(16) [[S:%.*]], i64 16, i1 false) +; CHECK-NEXT: ret i8* [[D]] +; + call void @llvm.memmove.p0i8.p0i8.i64(i8* align 1 %d, i8* align 1 %s, i64 16, i1 false) + ret i8* %d +} +define i8* @llvm_memset_const_size_set_deref(i8* nocapture readonly %s, i8 %c) { +; CHECK-LABEL: @llvm_memset_const_size_set_deref( +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* align 1 dereferenceable(16) [[S:%.*]], i8 [[C:%.*]], i64 16, i1 false) +; CHECK-NEXT: ret i8* [[S]] +; + call void @llvm.memset.p0i8.i64(i8* align 1 %s, i8 %c, i64 16, i1 false) + ret i8* %s +}