diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -2517,6 +2517,15 @@ return __builtin_addressof(value); } +``__builtin_function_start`` +----------------------------- + +``__builtin_function_start`` returns the address of a function body. Depending +on the platform and enabled compiler features, the returned address may +be different than the normally taken function address. For example, with +``-fsanitize=cfi``, the compiler replaces function addresses with references to +the CFI jump table, which may not be desirable in low-level system code. + ``__builtin_operator_new`` and ``__builtin_operator_delete`` ------------------------------------------------------------ diff --git a/clang/include/clang/AST/APValue.h b/clang/include/clang/AST/APValue.h --- a/clang/include/clang/AST/APValue.h +++ b/clang/include/clang/AST/APValue.h @@ -176,6 +176,8 @@ unsigned getVersion() const; QualType getTypeInfoType() const; QualType getDynamicAllocType() const; + bool isNoCFIValue() const; + void setNoCFIValue(bool NoCFI); QualType getType() const; @@ -190,6 +192,7 @@ PtrTy Ptr; struct LocalState { unsigned CallIndex, Version; + bool NoCFIValue; }; union { LocalState Local; diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -1561,6 +1561,7 @@ // Clang builtins (not available in GCC). BUILTIN(__builtin_addressof, "v*v&", "nct") +BUILTIN(__builtin_function_start, "v*v&", "nct") BUILTIN(__builtin_operator_new, "v*z", "tc") BUILTIN(__builtin_operator_delete, "vv*", "tn") BUILTIN(__builtin_char_memchr, "c*cC*iz", "n") diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -838,6 +838,9 @@ "%2, but the corresponding specifier may require size %3">, InGroup; +def err_function_start_invalid_type: Error< + "argument must be a function">; + /// main() // static main() is not an error in C, just in C++. def warn_static_main : Warning<"'main' should not be declared static">, diff --git a/clang/lib/AST/APValue.cpp b/clang/lib/AST/APValue.cpp --- a/clang/lib/AST/APValue.cpp +++ b/clang/lib/AST/APValue.cpp @@ -40,9 +40,11 @@ "Type is insufficiently aligned"); APValue::LValueBase::LValueBase(const ValueDecl *P, unsigned I, unsigned V) - : Ptr(P ? cast(P->getCanonicalDecl()) : nullptr), Local{I, V} {} + : Ptr(P ? cast(P->getCanonicalDecl()) : nullptr), + Local{I, V, + /*NoCFIValue =*/false} {} APValue::LValueBase::LValueBase(const Expr *P, unsigned I, unsigned V) - : Ptr(P), Local{I, V} {} + : Ptr(P), Local{I, V, /*NoCFIValue=*/false} {} APValue::LValueBase APValue::LValueBase::getDynamicAlloc(DynamicAllocLValue LV, QualType Type) { @@ -124,6 +126,15 @@ return QualType::getFromOpaquePtr(DynamicAllocType); } +bool APValue::LValueBase::isNoCFIValue() const { + return (is() ? Local.NoCFIValue : false); +} + +void APValue::LValueBase::setNoCFIValue(bool NoCFI) { + if (is()) + Local.NoCFIValue = NoCFI; +} + void APValue::LValueBase::Profile(llvm::FoldingSetNodeID &ID) const { ID.AddPointer(Ptr.getOpaqueValue()); if (is() || is()) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -8982,6 +8982,11 @@ switch (BuiltinOp) { case Builtin::BI__builtin_addressof: return evaluateLValue(E->getArg(0), Result); + case Builtin::BI__builtin_function_start: + if (!evaluateLValue(E->getArg(0), Result)) + return false; + Result.Base.setNoCFIValue(true); + return true; case Builtin::BI__builtin_assume_aligned: { // We need to be very careful here because: if the pointer does not have the // asserted alignment, then the behavior is undefined, and undefined @@ -12665,6 +12670,10 @@ return Success(CmpResult::Unequal, E); } + // Don't allow the comparison of NoCFI values + if (LHSValue.Base.isNoCFIValue() || RHSValue.Base.isNoCFIValue()) + return Error(E); + const CharUnits &LHSOffset = LHSValue.getLValueOffset(); const CharUnits &RHSOffset = RHSValue.getLValueOffset(); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -4477,6 +4477,11 @@ } case Builtin::BI__builtin_addressof: return RValue::get(EmitLValue(E->getArg(0)).getPointer(*this)); + case Builtin::BI__builtin_function_start: + return RValue::get(llvm::ConstantExpr::getBitCast( + llvm::NoCFIValue::get(cast( + EmitLValue(E->getArg(0)).getPointer(*this)->stripPointerCasts())), + llvm::Type::getInt8PtrTy(getLLVMContext()))); case Builtin::BI__builtin_operator_new: return EmitBuiltinNewDeleteCall( E->getCallee()->getType()->castAs(), E, false); diff --git a/clang/lib/CodeGen/CGExprConstant.cpp b/clang/lib/CodeGen/CGExprConstant.cpp --- a/clang/lib/CodeGen/CGExprConstant.cpp +++ b/clang/lib/CodeGen/CGExprConstant.cpp @@ -1889,8 +1889,15 @@ if (D->hasAttr()) return CGM.GetWeakRefReference(D).getPointer(); - if (auto FD = dyn_cast(D)) - return CGM.GetAddrOfFunction(FD); + if (const auto *FD = dyn_cast(D)) { + llvm::Constant *C = CGM.GetAddrOfFunction(FD); + if (base.isNoCFIValue()) + return llvm::ConstantExpr::getBitCast( + llvm::NoCFIValue::get( + cast(C->stripPointerCasts())), + llvm::Type::getInt8PtrTy(CGM.getLLVMContext())); + return C; + } if (auto VD = dyn_cast(D)) { // We can never refer to a variable with local storage. diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -195,6 +195,35 @@ return false; } +/// Check that the argument to __builtin_function_start is a function. +static bool SemaBuiltinSymbolAddress(Sema &S, CallExpr *TheCall) { + if (checkArgCount(S, TheCall, 1)) + return true; + + Expr *E = TheCall->getArg(0); + + // Allow C++ member functions, e.g. __builtin_function_start(&Foo::Bar) + if (UnaryOperator *UnaryOp = dyn_cast_or_null(E)) + if (UnaryOp->getOpcode() == UnaryOperator::Opcode::UO_AddrOf) + E = UnaryOp->getSubExpr(); + + // Allow only function declarations + if (E->getStmtClass() != Stmt::DeclRefExprClass || + !isa(cast(E)->getDecl())) { + S.Diag(E->getBeginLoc(), diag::err_function_start_invalid_type) + << E->getSourceRange(); + return true; + } + + ExprResult Arg(E); + QualType ResultType = S.CheckAddressOfOperand(Arg, TheCall->getBeginLoc()); + if (ResultType.isNull()) + return true; + + TheCall->setArg(0, Arg.get()); + return false; +} + /// Check the number of arguments and set the result type to /// the argument type. static bool SemaBuiltinPreserveAI(Sema &S, CallExpr *TheCall) { @@ -1874,6 +1903,10 @@ if (SemaBuiltinAddressof(*this, TheCall)) return ExprError(); break; + case Builtin::BI__builtin_function_start: + if (SemaBuiltinSymbolAddress(*this, TheCall)) + return ExprError(); + break; case Builtin::BI__builtin_is_aligned: case Builtin::BI__builtin_align_up: case Builtin::BI__builtin_align_down: diff --git a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/BuiltinFunctionChecker.cpp @@ -66,7 +66,8 @@ case Builtin::BI__builtin_expect: case Builtin::BI__builtin_expect_with_probability: case Builtin::BI__builtin_assume_aligned: - case Builtin::BI__builtin_addressof: { + case Builtin::BI__builtin_addressof: + case Builtin::BI__builtin_function_start: { // For __builtin_unpredictable, __builtin_expect, // __builtin_expect_with_probability and __builtin_assume_aligned, // just return the value of the subexpression. diff --git a/clang/test/CodeGen/builtin-function-start.cpp b/clang/test/CodeGen/builtin-function-start.cpp new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/builtin-function-start.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -fsanitize=cfi-icall -o - %s | FileCheck %s + +#if !__has_builtin(__builtin_function_start) +#error "missing __builtin_function_start" +#endif + +void a(void) {} +void b(void) {} +void c(void *p) {} + +class A { +public: + void f(); + virtual void g(); + static void h(); +}; + +void A::f() {} +void A::g() {} +void A::h() {} + +// CHECK: @e = global i8* bitcast (void ()* no_cfi @_Z1av to i8*) +const void *e = __builtin_function_start(a); +// CHECK: @f = global [2 x i8*] [i8* bitcast (void ()* @_Z1bv to i8*), i8* bitcast (void ()* no_cfi @_Z1bv to i8*)] +void *f[] = { (void *)b, __builtin_function_start(b) }; + +void d(void) { + // CHECK: store i8* bitcast (void ()* no_cfi @_Z1bv to i8*), i8** %g + void *g = __builtin_function_start(b); + // call void @_Z1cPv(i8* bitcast (void ()* no_cfi @_Z1av to i8*)) + c(__builtin_function_start(a)); + + // CHECK: store i8* bitcast (void (%class.A*)* no_cfi @_ZN1A1fEv to i8*), i8** %Af + void *Af = __builtin_function_start(&A::f); + // CHECK: store i8* bitcast (void (%class.A*)* no_cfi @_ZN1A1gEv to i8*), i8** %Ag + void *Ag = __builtin_function_start(&A::g); + // CHECK: store i8* bitcast (void ()* no_cfi @_ZN1A1hEv to i8*), i8** %Ah + void *Ah = __builtin_function_start(&A::h); +} diff --git a/clang/test/SemaCXX/builtins.cpp b/clang/test/SemaCXX/builtins.cpp --- a/clang/test/SemaCXX/builtins.cpp +++ b/clang/test/SemaCXX/builtins.cpp @@ -39,6 +39,11 @@ S *ptmp = __builtin_addressof(S{}); // expected-error {{taking the address of a temporary}} } +namespace function_start { +void a(void) {} +static_assert(__builtin_function_start(a) == a, ""); // expected-error {{static_assert expression is not an integral constant expression}} +} // namespace function_start + void no_ms_builtins() { __assume(1); // expected-error {{use of undeclared}} __noop(1); // expected-error {{use of undeclared}}