Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6174,7 +6174,7 @@ LibFunc::Func Func; if (!F->hasLocalLinkage() && F->hasName() && LibInfo->getLibFunc(F->getName(), Func) && - LibInfo->hasOptimizedCodeGen(Func)) { + LibInfo->hasOptimizedCodeGen(Func) && !I.isNoBuiltin()) { switch (Func) { default: break; case LibFunc::copysign: Index: lib/Transforms/Instrumentation/AddressSanitizer.cpp =================================================================== --- lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -475,6 +475,7 @@ void instrumentMemIntrinsic(MemIntrinsic *MI); Value *memToShadow(Value *Shadow, IRBuilder<> &IRB); bool runOnFunction(Function &F) override; + void maybeMarkCallNoInline(CallInst *CI, const TargetLibraryInfo *TLI); bool maybeInsertAsanInitAtFunctionEntry(Function &F); void markEscapedLocalAllocas(Function &F); bool doInitialization(Module &M) override; @@ -1751,6 +1752,8 @@ bool IsWrite; unsigned Alignment; uint64_t TypeSize; + const TargetLibraryInfo *TLI = + &getAnalysis().getTLI(); // Fill the set of memory operations to instrument. for (auto &BB : F) { @@ -1779,6 +1782,9 @@ TempsToInstrument.clear(); if (CS.doesNotReturn()) NoReturnCalls.push_back(CS.getInstruction()); } + if (CallInst *CI = dyn_cast(&Inst)) { + maybeMarkCallNoInline(CI, TLI); + } continue; } ToInstrument.push_back(&Inst); @@ -1791,8 +1797,6 @@ CompileKernel || (ClInstrumentationWithCallsThreshold >= 0 && ToInstrument.size() > (unsigned)ClInstrumentationWithCallsThreshold); - const TargetLibraryInfo *TLI = - &getAnalysis().getTLI(); const DataLayout &DL = F.getParent()->getDataLayout(); ObjectSizeOffsetVisitor ObjSizeVis(DL, TLI, F.getContext(), /*RoundToAlign=*/true); @@ -1833,6 +1837,31 @@ return res; } +// CodeGen has special handling for some string functions that may replace +// them with target-specific intrinsics. Since that'd skip our interceptors, +// and thus make us miss some memory accesses, we mark affected calls +// as NoBuiltin, which will disable optimization in CodeGen. +void AddressSanitizer::maybeMarkCallNoInline(CallInst *CI, + const TargetLibraryInfo *TLI) { + Function *F = CI->getCalledFunction(); + LibFunc::Func Func; + if (!F || F->hasLocalLinkage() || !F->hasName() || + !TLI->getLibFunc(F->getName(), Func)) + return; + switch (Func) { + default: break; + case LibFunc::memcmp: + case LibFunc::memchr: + case LibFunc::strcpy: + case LibFunc::stpcpy: + case LibFunc::strcmp: + case LibFunc::strlen: + case LibFunc::strnlen: + CI->addAttribute(AttributeSet::FunctionIndex, Attribute::NoBuiltin); + break; + } +} + // Workaround for bug 11395: we don't want to instrument stack in functions // with large assembly blobs (32-bit only), otherwise reg alloc may crash. // FIXME: remove once the bug 11395 is fixed. Index: test/CodeGen/SystemZ/memchr-03.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/memchr-03.ll @@ -0,0 +1,16 @@ +; Test that memchr won't be converted to SRST if calls are +; marked with nobuiltin, eg. for sanitizers. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -verify-machineinstrs | FileCheck %s + +declare i8 *@memchr(i8 *%src, i16 %char, i32 %len) + +; Test a simple forwarded call. +define i8 *@f1(i8 *%src, i16 %char, i32 %len) { +; CHECK-LABEL: f1: +; CHECK-NOT: srst +; CHECK: brasl %r14, memchr +; CHECK: br %r14 + %res = call i8 *@memchr(i8 *%src, i16 %char, i32 %len) nobuiltin + ret i8 *%res +} Index: test/CodeGen/SystemZ/memcmp-03.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/memcmp-03.ll @@ -0,0 +1,191 @@ +; Test that memcmp won't be converted to CLC if calls are +; marked with nobuiltin, eg. for sanitizers. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare signext i32 @memcmp(i8 *%src1, i8 *%src2, i64 %size) + +; Zero-length comparisons should be optimized away. +define i32 @f1(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f1: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 0) nobuiltin + ret i32 %res +} + +; Check a case where the result is used as an integer. +define i32 @f2(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f2: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2) nobuiltin + ret i32 %res +} + +; Check a case where the result is tested for equality. +define void @f3(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f3: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 3) nobuiltin + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested for inequality. +define void @f4(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f4: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 4) nobuiltin + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested via slt. +define void @f5(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f5: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 5) nobuiltin + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested for sgt. +define void @f6(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f6: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 6) nobuiltin + %cmp = icmp sgt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Check the upper end of the CLC range. Here the result is used both as +; an integer and for branching. +define i32 @f7(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f7: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 256) nobuiltin + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; 257 bytes needs two CLCs. +define i32 @f8(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f8: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257) nobuiltin + ret i32 %res +} + +; Test a comparison of 258 bytes in which the CC result can be used directly. +define void @f9(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f9: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257) nobuiltin + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Test the largest size that can use two CLCs. +define i32 @f10(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f10: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 512) nobuiltin + ret i32 %res +} + +; Test the smallest size that needs 3 CLCs. +define i32 @f11(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f11: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 513) nobuiltin + ret i32 %res +} + +; Test the largest size than can use 3 CLCs. +define i32 @f12(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f12: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 768) nobuiltin + ret i32 %res +} + +; The next size up uses a loop instead. We leave the more complicated +; loop tests to memcpy-01.ll, which shares the same form. +define i32 @f13(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f13: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 769) nobuiltin + ret i32 %res +} Index: test/CodeGen/SystemZ/strcmp-03.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/strcmp-03.ll @@ -0,0 +1,54 @@ +; Test that strcmp won't be converted to CLST if calls are +; marked with nobuiltin, eg. for sanitizers. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare signext i32 @strcmp(i8 *%src1, i8 *%src2) + +; Check a case where the result is used as an integer. +define i32 @f1(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f1: +; CHECK-NOT: clst +; CHECK: brasl %r14, strcmp +; CHECK: br %r14 + %res = call i32 @strcmp(i8 *%src1, i8 *%src2) nobuiltin + ret i32 %res +} + +; Check a case where the result is tested for equality. +define void @f2(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f2: +; CHECK-NOT: clst +; CHECK: brasl %r14, strcmp +; CHECK: br %r14 + %res = call i32 @strcmp(i8 *%src1, i8 *%src2) nobuiltin + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Test a case where the result is used both as an integer and for +; branching. +define i32 @f3(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f3: +; CHECK-NOT: clst +; CHECK: brasl %r14, strcmp +; CHECK: br %r14 +entry: + %res = call i32 @strcmp(i8 *%src1, i8 *%src2) nobuiltin + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret i32 %res +} Index: test/CodeGen/SystemZ/strcpy-02.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/strcpy-02.ll @@ -0,0 +1,42 @@ +; Test that strcmp won't be converted to MVST if calls are +; marked with nobuiltin, eg. for sanitizers. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i8 *@strcpy(i8 *%dest, i8 *%src) +declare i8 *@stpcpy(i8 *%dest, i8 *%src) + +; Check strcpy. +define i8 *@f1(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f1: +; CHECK-NOT: mvst +; CHECK: brasl %r14, strcpy +; CHECK: br %r14 + %res = call i8 *@strcpy(i8 *%dest, i8 *%src) nobuiltin + ret i8 *%res +} + +; Check stpcpy. +define i8 *@f2(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f2: +; CHECK-NOT: mvst +; CHECK: brasl %r14, stpcpy +; CHECK: br %r14 + %res = call i8 *@stpcpy(i8 *%dest, i8 *%src) nobuiltin + ret i8 *%res +} + +; Check correct operation with other loads and stores. The load must +; come before the loop and the store afterwards. +define i32 @f3(i32 %dummy, i8 *%dest, i8 *%src, i32 *%resptr, i32 *%storeptr) { +; CHECK-LABEL: f3: +; CHECK-DAG: l [[REG1:%r[0-9]+]], 0(%r5) +; CHECK-NOT: mvst +; CHECK: brasl %r14, strcpy +; CHECK: mvhi 0(%r6), 0 +; CHECK: br %r14 + %res = load i32 , i32 *%resptr + %unused = call i8 *@strcpy(i8 *%dest, i8 *%src) nobuiltin + store i32 0, i32 *%storeptr + ret i32 %res +} Index: test/CodeGen/SystemZ/strlen-03.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/strlen-03.ll @@ -0,0 +1,25 @@ +; Test that strlen/strnlen won't be converted to SRST if calls are +; marked with nobuiltin, eg. for sanitizers. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @strlen(i8 *%src) +declare i64 @strnlen(i8 *%src, i64 %len) + +define i64 @f1(i32 %dummy, i8 *%src) { +; CHECK-LABEL: f1: +; CHECK: brasl %r14, strlen +; CHECK: br %r14 + %res = call i64 @strlen(i8 *%src) nobuiltin + ret i64 %res +} + +; Likewise for strnlen. +define i64 @f2(i64 %len, i8 *%src) { +; CHECK-LABEL: f2: +; CHECK-NOT: srst +; CHECK: brasl %r14, strnlen +; CHECK: br %r14 + %res = call i64 @strnlen(i8 *%src, i64 %len) nobuiltin + ret i64 %res +} Index: test/Instrumentation/AddressSanitizer/str-nobuiltin.ll =================================================================== --- /dev/null +++ test/Instrumentation/AddressSanitizer/str-nobuiltin.ll @@ -0,0 +1,33 @@ +; Test marking string functions as nobuiltin in address sanitizer. +; +; RUN: opt < %s -asan -asan-module -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +declare i8* @memchr(i8* %a, i32 %b, i64 %c) +declare i32 @memcmp(i8* %a, i8* %b, i64 %c) +declare i32 @strcmp(i8* %a, i8* %b) +declare i8* @strcpy(i8* %a, i8* %b) +declare i8* @stpcpy(i8* %a, i8* %b) +declare i64 @strlen(i8* %a) +declare i64 @strnlen(i8* %a, i64 %b) + +; CHECK: call{{.*}}@memchr{{.*}} #[[ATTR:[0-9]+]] +; CHECK: call{{.*}}@memcmp{{.*}} #[[ATTR]] +; CHECK: call{{.*}}@strcmp{{.*}} #[[ATTR]] +; CHECK: call{{.*}}@strcpy{{.*}} #[[ATTR]] +; CHECK: call{{.*}}@stpcpy{{.*}} #[[ATTR]] +; CHECK: call{{.*}}@strlen{{.*}} #[[ATTR]] +; CHECK: call{{.*}}@strnlen{{.*}} #[[ATTR]] +; attributes #[[ATTR]] = { nobuiltin } + +define void @f1(i8* %a, i8* %b) nounwind uwtable sanitize_address { + tail call i8* @memchr(i8* %a, i32 1, i64 12) + tail call i32 @memcmp(i8* %a, i8* %b, i64 12) + tail call i32 @strcmp(i8* %a, i8* %b) + tail call i8* @strcpy(i8* %a, i8* %b) + tail call i8* @stpcpy(i8* %a, i8* %b) + tail call i64 @strlen(i8* %a) + tail call i64 @strnlen(i8* %a, i64 12) + ret void +}