Index: lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6207,9 +6207,10 @@ } // Check for well-known libc/libm calls. If the function is internal, it - // can't be a library call. + // can't be a library call. Don't do the check if marked as nobuiltin for + // some reason. LibFunc::Func Func; - if (!F->hasLocalLinkage() && F->hasName() && + if (!I.isNoBuiltin() && !F->hasLocalLinkage() && F->hasName() && LibInfo->getLibFunc(F->getName(), Func) && LibInfo->hasOptimizedCodeGen(Func)) { switch (Func) { Index: test/CodeGen/SystemZ/memchr-nobuiltin.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/memchr-nobuiltin.ll @@ -0,0 +1,16 @@ +; Test that memchr won't be converted to SRST if calls are +; marked with nobuiltin, eg. for sanitizers. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i8 *@memchr(i8 *%src, i16 %char, i32 %len) + +; Test a simple forwarded call. +define i8 *@f1(i8 *%src, i16 %char, i32 %len) { +; CHECK-LABEL: f1: +; CHECK-NOT: srst +; CHECK: brasl %r14, memchr +; CHECK: br %r14 + %res = call i8 *@memchr(i8 *%src, i16 %char, i32 %len) nobuiltin + ret i8 *%res +} Index: test/CodeGen/SystemZ/memcmp-nobuiltin.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/memcmp-nobuiltin.ll @@ -0,0 +1,191 @@ +; Test that memcmp won't be converted to CLC if calls are +; marked with nobuiltin, eg. for sanitizers. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare signext i32 @memcmp(i8 *%src1, i8 *%src2, i64 %size) + +; Zero-length comparisons should be optimized away. +define i32 @f1(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f1: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 0) nobuiltin + ret i32 %res +} + +; Check a case where the result is used as an integer. +define i32 @f2(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f2: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 2) nobuiltin + ret i32 %res +} + +; Check a case where the result is tested for equality. +define void @f3(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f3: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 3) nobuiltin + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested for inequality. +define void @f4(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f4: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 4) nobuiltin + %cmp = icmp ne i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested via slt. +define void @f5(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f5: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 5) nobuiltin + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Check a case where the result is tested for sgt. +define void @f6(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f6: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 6) nobuiltin + %cmp = icmp sgt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Check the upper end of the CLC range. Here the result is used both as +; an integer and for branching. +define i32 @f7(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f7: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 256) nobuiltin + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret i32 %res +} + +; 257 bytes needs two CLCs. +define i32 @f8(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f8: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257) nobuiltin + ret i32 %res +} + +; Test a comparison of 258 bytes in which the CC result can be used directly. +define void @f9(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f9: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 +entry: + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 257) nobuiltin + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Test the largest size that can use two CLCs. +define i32 @f10(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f10: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 512) nobuiltin + ret i32 %res +} + +; Test the smallest size that needs 3 CLCs. +define i32 @f11(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f11: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 513) nobuiltin + ret i32 %res +} + +; Test the largest size than can use 3 CLCs. +define i32 @f12(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f12: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 768) nobuiltin + ret i32 %res +} + +; The next size up uses a loop instead. We leave the more complicated +; loop tests to memcpy-01.ll, which shares the same form. +define i32 @f13(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f13: +; CHECK-NOT: clc +; CHECK: brasl %r14, memcmp +; CHECK: br %r14 + %res = call i32 @memcmp(i8 *%src1, i8 *%src2, i64 769) nobuiltin + ret i32 %res +} Index: test/CodeGen/SystemZ/strcmp-nobuiltin.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/strcmp-nobuiltin.ll @@ -0,0 +1,54 @@ +; Test that strcmp won't be converted to CLST if calls are +; marked with nobuiltin, eg. for sanitizers. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare signext i32 @strcmp(i8 *%src1, i8 *%src2) + +; Check a case where the result is used as an integer. +define i32 @f1(i8 *%src1, i8 *%src2) { +; CHECK-LABEL: f1: +; CHECK-NOT: clst +; CHECK: brasl %r14, strcmp +; CHECK: br %r14 + %res = call i32 @strcmp(i8 *%src1, i8 *%src2) nobuiltin + ret i32 %res +} + +; Check a case where the result is tested for equality. +define void @f2(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f2: +; CHECK-NOT: clst +; CHECK: brasl %r14, strcmp +; CHECK: br %r14 + %res = call i32 @strcmp(i8 *%src1, i8 *%src2) nobuiltin + %cmp = icmp eq i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret void +} + +; Test a case where the result is used both as an integer and for +; branching. +define i32 @f3(i8 *%src1, i8 *%src2, i32 *%dest) { +; CHECK-LABEL: f3: +; CHECK-NOT: clst +; CHECK: brasl %r14, strcmp +; CHECK: br %r14 +entry: + %res = call i32 @strcmp(i8 *%src1, i8 *%src2) nobuiltin + %cmp = icmp slt i32 %res, 0 + br i1 %cmp, label %exit, label %store + +store: + store i32 0, i32 *%dest + br label %exit + +exit: + ret i32 %res +} Index: test/CodeGen/SystemZ/strcpy-nobuiltin.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/strcpy-nobuiltin.ll @@ -0,0 +1,42 @@ +; Test that strcmp won't be converted to MVST if calls are +; marked with nobuiltin, eg. for sanitizers. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i8 *@strcpy(i8 *%dest, i8 *%src) +declare i8 *@stpcpy(i8 *%dest, i8 *%src) + +; Check strcpy. +define i8 *@f1(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f1: +; CHECK-NOT: mvst +; CHECK: brasl %r14, strcpy +; CHECK: br %r14 + %res = call i8 *@strcpy(i8 *%dest, i8 *%src) nobuiltin + ret i8 *%res +} + +; Check stpcpy. +define i8 *@f2(i8 *%dest, i8 *%src) { +; CHECK-LABEL: f2: +; CHECK-NOT: mvst +; CHECK: brasl %r14, stpcpy +; CHECK: br %r14 + %res = call i8 *@stpcpy(i8 *%dest, i8 *%src) nobuiltin + ret i8 *%res +} + +; Check correct operation with other loads and stores. The load must +; come before the loop and the store afterwards. +define i32 @f3(i32 %dummy, i8 *%dest, i8 *%src, i32 *%resptr, i32 *%storeptr) { +; CHECK-LABEL: f3: +; CHECK-DAG: l [[REG1:%r[0-9]+]], 0(%r5) +; CHECK-NOT: mvst +; CHECK: brasl %r14, strcpy +; CHECK: mvhi 0(%r6), 0 +; CHECK: br %r14 + %res = load i32 , i32 *%resptr + %unused = call i8 *@strcpy(i8 *%dest, i8 *%src) nobuiltin + store i32 0, i32 *%storeptr + ret i32 %res +} Index: test/CodeGen/SystemZ/strlen-nobuiltin.ll =================================================================== --- /dev/null +++ test/CodeGen/SystemZ/strlen-nobuiltin.ll @@ -0,0 +1,25 @@ +; Test that strlen/strnlen won't be converted to SRST if calls are +; marked with nobuiltin, eg. for sanitizers. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @strlen(i8 *%src) +declare i64 @strnlen(i8 *%src, i64 %len) + +define i64 @f1(i32 %dummy, i8 *%src) { +; CHECK-LABEL: f1: +; CHECK: brasl %r14, strlen +; CHECK: br %r14 + %res = call i64 @strlen(i8 *%src) nobuiltin + ret i64 %res +} + +; Likewise for strnlen. +define i64 @f2(i64 %len, i8 *%src) { +; CHECK-LABEL: f2: +; CHECK-NOT: srst +; CHECK: brasl %r14, strnlen +; CHECK: br %r14 + %res = call i64 @strnlen(i8 *%src, i64 %len) nobuiltin + ret i64 %res +} Index: test/CodeGen/X86/memcmp.ll =================================================================== --- test/CodeGen/X86/memcmp.ll +++ test/CodeGen/X86/memcmp.ll @@ -45,6 +45,21 @@ ; CHECK-NEXT: cmpl $28527, } +define void @memcmp2nb(i8* %X, i8* %Y, i32* nocapture %P) nounwind { +entry: + %0 = tail call i32 (...) @memcmp(i8* %X, i8* %Y, i32 2) nounwind nobuiltin ; [#uses=1] + %1 = icmp eq i32 %0, 0 ; [#uses=1] + br i1 %1, label %return, label %bb + +bb: ; preds = %entry + store i32 4, i32* %P, align 4 + ret void + +return: ; preds = %entry + ret void +; CHECK-LABEL: memcmp2nb: +; CHECK: callq +} define void @memcmp4(i8* %X, i8* %Y, i32* nocapture %P) nounwind { entry: