diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -3177,7 +3177,8 @@ const FunctionDecl *Definition; return hasBody(Definition) && Definition->isInlineSpecified() && - Definition->hasAttr(); + Definition->hasAttr() && + Definition->hasAttr(); } bool FunctionDecl::isDestroyingOperatorDelete() const { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4891,12 +4891,28 @@ const FunctionDecl *FD = cast(GD.getDecl()); if (auto builtinID = FD->getBuiltinID()) { - // Replaceable builtin provide their own implementation of a builtin. Unless - // we are in the builtin implementation itself, don't call the actual - // builtin. If we are in the builtin implementation, avoid trivial infinite + std::string FDInlineName = (FD->getName() + ".inline").str(); + // When directing calling an inline builtin, call it through it's mangled + // name to make it clear it's not the actual builtin. + if (FD->isInlineBuiltinDeclaration() && + CGF.CurFn->getName() != FDInlineName) { + llvm::Constant *CalleePtr = EmitFunctionDeclPointer(CGF.CGM, GD); + llvm::Function *Fn = llvm::cast(CalleePtr); + llvm::Module *M = Fn->getParent(); + llvm::Function *Clone = M->getFunction(FDInlineName); + if (!Clone) { + Clone = llvm::Function::Create(Fn->getFunctionType(), + llvm::GlobalValue::InternalLinkage, + Fn->getAddressSpace(), FDInlineName, M); + Clone->addFnAttr(llvm::Attribute::AlwaysInline); + } + return CGCallee::forDirect(Clone, GD); + } + + // Replaceable builtins provide their own implementation of a builtin. If we + // are in an inline builtin implementation, avoid trivial infinite // recursion. - if (!FD->isInlineBuiltinDeclaration() || - CGF.CurFn->getName() == FD->getName()) + else return CGCallee::forBuiltin(builtinID, FD); } @@ -4905,6 +4921,7 @@ FD->hasAttr()) CalleePtr = CGF.CGM.getCUDARuntime().getKernelStub( cast(CalleePtr->stripPointerCasts())); + return CGCallee::forDirect(CalleePtr, GD); } diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/CRC.h" #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" + using namespace clang; using namespace CodeGen; @@ -1294,10 +1295,22 @@ FunctionArgList Args; QualType ResTy = BuildFunctionArgList(GD, Args); - // Give a different name to inline builtin to avoid conflict with actual - // builtins. - if (FD->isInlineBuiltinDeclaration() && Fn) - Fn->setName(Fn->getName() + ".inline"); + // When generating code for a builtin with an inline declaration, use a + // mangled name to hold the actual body, while keeping an external definition + // in case the function pointer is referenced somewhere. + if (FD->isInlineBuiltinDeclaration() && Fn) { + std::string FDInlineName = (Fn->getName() + ".inline").str(); + llvm::Module *M = Fn->getParent(); + llvm::Function *Clone = M->getFunction(FDInlineName); + if (!Clone) { + Clone = llvm::Function::Create(Fn->getFunctionType(), + llvm::GlobalValue::InternalLinkage, + Fn->getAddressSpace(), FDInlineName, M); + Clone->addFnAttr(llvm::Attribute::AlwaysInline); + } + Fn->setLinkage(llvm::GlobalValue::ExternalLinkage); + Fn = Clone; + } // Check if we should generate debug info for this function. if (FD->hasAttr()) { diff --git a/clang/test/CodeGen/memcmp-inline-builtin-to-asm.c b/clang/test/CodeGen/memcmp-inline-builtin-to-asm.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/memcmp-inline-builtin-to-asm.c @@ -0,0 +1,36 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// RUN: %clang_cc1 -triple x86_64 -S -emit-llvm -o - %s | opt -S -verify | FileCheck %s +// +// Verifies that clang detects memcmp inline version and uses it instead of the builtin. + +typedef unsigned long size_t; + +// Clang requires these attributes for a function to be redefined. +#define AVAILABLE_EXTERNALLY extern inline __attribute__((always_inline)) __attribute__((gnu_inline)) + +const void *con_unify_unimap_p1; + +AVAILABLE_EXTERNALLY +int memcmp(const void *p, const void *q, unsigned long size) { + return __builtin_memcmp(p, q, size); +} + +// CHECK-LABEL: @con_unify_unimap_q1( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[P_ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[Q_ADDR_I:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[SIZE_ADDR_I:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i8*, i8** @con_unify_unimap_p1, align 8 +// CHECK-NEXT: store i8* [[TMP0]], i8** [[P_ADDR_I]], align 8 +// CHECK-NEXT: store i8* bitcast (i32 ()* @con_unify_unimap_q1 to i8*), i8** [[Q_ADDR_I]], align 8 +// CHECK-NEXT: store i64 4, i64* [[SIZE_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i8*, i8** [[P_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** [[Q_ADDR_I]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i64, i64* [[SIZE_ADDR_I]], align 8 +// CHECK-NEXT: [[CALL_I:%.*]] = call i32 @memcmp(i8* [[TMP1]], i8* [[TMP2]], i64 [[TMP3]]) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: ret i32 [[CALL_I]] +// +int con_unify_unimap_q1(void) { + return memcmp(con_unify_unimap_p1, con_unify_unimap_q1, sizeof(int)); +} diff --git a/clang/test/CodeGen/memcpy-inline-builtin-no-extern.c b/clang/test/CodeGen/memcpy-inline-builtin-no-extern.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/memcpy-inline-builtin-no-extern.c @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -triple x86_64 -S -emit-llvm -disable-llvm-passes -o - %s | FileCheck %s +// +// Verifies that clang-generated *.inline are flagged as internal. + +typedef unsigned long size_t; + +#define AVAILABLE_EXTERNALLY extern inline __attribute__((always_inline)) __attribute__((gnu_inline)) +// Clang recognizes an inline builtin and renames it to memcmp.inline to prevent conflict with builtins. +AVAILABLE_EXTERNALLY int memcmp(const void *a, const void *b, size_t c) { + return __builtin_memcmp(a, b, c); +} + +// CHECK: internal{{.*}}memcmp.inline +int bar(const void *a, const void *b, size_t c) { + return memcmp(a, b, c); +} + +// Note that extern has been omitted here. +#define TRIPLE_INLINE inline __attribute__((always_inline)) __attribute__((gnu_inline)) + +// Clang recognizes an inline builtin and renames it to memcpy.inline to prevent conflict with builtins. +TRIPLE_INLINE void *memcpy(void *a, const void *b, size_t c) { + return __builtin_memcpy(a, b, c); +} + +// CHECK: internal{{.*}}memcpy.inline +void *foo(void *a, const void *b, size_t c) { + return memcpy(a, b, c); +} diff --git a/clang/test/CodeGen/memcpy-inline-builtin.c b/clang/test/CodeGen/memcpy-inline-builtin.c --- a/clang/test/CodeGen/memcpy-inline-builtin.c +++ b/clang/test/CodeGen/memcpy-inline-builtin.c @@ -32,13 +32,39 @@ // CHECK-NEXT: store i8* [[TMP0]], i8** [[A_ADDR_I]], align 8 // CHECK-NEXT: store i8* [[TMP1]], i8** [[B_ADDR_I]], align 8 // CHECK-NEXT: store i64 [[TMP2]], i64* [[C_ADDR_I]], align 8 -// CHECK-NEXT: call void asm sideeffect "# memcpy.inline marker", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR2:[0-9]+]], !srcloc !2 +// CHECK-NEXT: call void asm sideeffect "# memcpy.inline marker", "~{dirflag},~{fpsr},~{flags}"() #[[ATTR4:[0-9]+]], !srcloc !2 // CHECK-NEXT: [[TMP3:%.*]] = load i8*, i8** [[A_ADDR_I]], align 8 // CHECK-NEXT: [[TMP4:%.*]] = load i8*, i8** [[B_ADDR_I]], align 8 // CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[C_ADDR_I]], align 8 -// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP3]], i8* align 1 [[TMP4]], i64 [[TMP5]], i1 false) #[[ATTR2]] +// CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 1 [[TMP3]], i8* align 1 [[TMP4]], i64 [[TMP5]], i1 false) #[[ATTR4]] +// CHECK-NEXT: ret i8* [[TMP3]] +// +void *foo(void *a, const void *b, size_t c) { + return memcpy(a, b, c); +} + +// CHECK-LABEL: @bar( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[CPY:%.*]] = alloca i8* (i8*, i8*, i64)*, align 8 +// CHECK-NEXT: store i8* [[A:%.*]], i8** [[A_ADDR]], align 8 +// CHECK-NEXT: store i8* [[B:%.*]], i8** [[B_ADDR]], align 8 +// CHECK-NEXT: store i64 [[C:%.*]], i64* [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[C_ADDR]], align 8 +// CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[TMP0]], 10 +// CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[CMP]] to i64 +// CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i8* (i8*, i8*, i64)* @memcpy, i8* (i8*, i8*, i64)* @foo +// CHECK-NEXT: store i8* (i8*, i8*, i64)* [[COND]], i8* (i8*, i8*, i64)** [[CPY]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i8* (i8*, i8*, i64)*, i8* (i8*, i8*, i64)** [[CPY]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i8*, i8** [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = load i8*, i8** [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP5:%.*]] = load i64, i64* [[C_ADDR]], align 8 +// CHECK-NEXT: [[CALL:%.*]] = call i8* [[TMP2]](i8* [[TMP3]], i8* [[TMP4]], i64 [[TMP5]]) // CHECK-NEXT: ret void // -void foo(void *a, const void *b, size_t c) { - memcpy(a, b, c); +void bar(void *a, const void *b, size_t c) { + void *(*cpy)(void *, const void *, size_t) = c > 10 ? memcpy : foo; + cpy(a, b, c); } diff --git a/clang/test/CodeGen/memcpy-nobuiltin.c b/clang/test/CodeGen/memcpy-nobuiltin.c --- a/clang/test/CodeGen/memcpy-nobuiltin.c +++ b/clang/test/CodeGen/memcpy-nobuiltin.c @@ -1,11 +1,11 @@ // RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -DWITH_DECL | FileCheck --check-prefix=CHECK-WITH-DECL %s // RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -UWITH_DECL | FileCheck --check-prefix=CHECK-NO-DECL %s -// RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -DWITH_SELF_REFERENCE_DECL | FileCheck --check-prefix=CHECK-SELF-REF-DECL %s +// RUN: %clang_cc1 -verify -S -emit-llvm -o- %s -isystem %S -disable-llvm-passes -DWITH_SELF_REFERENCE_DECL | FileCheck --check-prefix=CHECK-SELF-REF-DECL %s // // CHECK-WITH-DECL-NOT: @llvm.memcpy // CHECK-NO-DECL: @llvm.memcpy // CHECK-SELF-REF-DECL-LABEL: define {{.*}}i8* @memcpy.inline -// CHECK-SELF-REF-DECL: @memcpy( +// CHECK-SELF-REF-DECL: @llvm.memcpy.{{.*}}( // #include void test(void *dest, void const *from, size_t n) { diff --git a/clang/test/CodeGen/memcpy-nobuiltin.inc b/clang/test/CodeGen/memcpy-nobuiltin.inc --- a/clang/test/CodeGen/memcpy-nobuiltin.inc +++ b/clang/test/CodeGen/memcpy-nobuiltin.inc @@ -2,7 +2,7 @@ extern void *memcpy(void *dest, void const *from, size_t n); #ifdef WITH_DECL -inline __attribute__((always_inline)) void *memcpy(void *dest, void const *from, size_t n) { +inline __attribute__((always_inline)) __attribute__((gnu_inline)) void *memcpy(void *dest, void const *from, size_t n) { char const *ifrom = from; char *idest = dest; while (n--) @@ -11,7 +11,7 @@ } #endif #ifdef WITH_SELF_REFERENCE_DECL -inline __attribute__((always_inline)) void *memcpy(void *dest, void const *from, size_t n) { +inline __attribute__((always_inline)) __attribute__((gnu_inline)) void *memcpy(void *dest, void const *from, size_t n) { if (n != 0) memcpy(dest, from, n); return dest; diff --git a/clang/test/CodeGen/pr9614.c b/clang/test/CodeGen/pr9614.c --- a/clang/test/CodeGen/pr9614.c +++ b/clang/test/CodeGen/pr9614.c @@ -32,14 +32,14 @@ // CHECK-LABEL: define{{.*}} void @f() // CHECK: call void @foo() -// CHECK: call i32 @abs(i32 %0) +// CHECK: call i32 @abs(i32 0) // CHECK: call i8* @strrchr( // CHECK: call void @llvm.prefetch.p0i8( // CHECK: call i8* @memchr( // CHECK: ret void // CHECK: declare void @foo() +// CHECK: declare i32 @abs(i32 // CHECK: declare i8* @strrchr(i8*, i32) // CHECK: declare i8* @memchr( -// CHECK: declare i32 @abs(i32 // CHECK: declare void @llvm.prefetch.p0i8(