diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2561,6 +2561,19 @@ let Documentation = [VectorCallDocs]; } +def ZeroCallUsedRegs : InheritableAttr { + let Spellings = [GCC<"zero_call_used_regs">]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Args = [ + EnumArgument<"ZeroCallUsedRegs", "ZeroCallUsedRegsKind", + ["skip", "used-gpr-arg", "used-gpr", "used-arg", "used", + "all-gpr-arg", "all-gpr", "all-arg", "all"], + ["Skip", "UsedGPRArg", "UsedGPR", "UsedArg", "Used", + "AllGPRArg", "AllGPR", "AllArg", "All"]> + ]; + let Documentation = [ZeroCallUsedRegsDocs]; +} + def Pascal : DeclOrTypeAttr { let Spellings = [Clang<"pascal">, Keyword<"__pascal">, Keyword<"_pascal">]; // let Subjects = [Function, ObjCMethod]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -6266,3 +6266,41 @@ } }]; } + +def ZeroCallUsedRegsDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +This attribute, when attached to a function, causes the compiler to zero a +subset of all call-used registers before the function returns. It's used to +increase program security by either mitigating `Return-Oriented Programming`_ +(ROP) attacks or preventing information leakage through registers. + +The term "`call-used" means registers which are not guaranteed to be preserved +unchanged for the caller by the current calling convention. This could also be +described as "caller-saved" or "not callee-saved". + +The `choice` parameters gives the programmer flexibility to choose the subset +of the call-used registers to be zeroed: + +- ``skip`` doesn't zero any call-used registers. This choice overrides any + command-line arguments. +- ``used`` only zeros call-used registers used in the function. By ``used``, we + mean a register whose contents have been set or referenced in the function. +- ``used-gpr`` only zeros call-used GPR registers used in the funciton. +- ``used-arg`` only zeros call-used registers used to pass arguments to the + function. +- ``used-gpr-arg`` only zeros call-used GPR registers used to pass arguments to + the function. +- ``all`` zeros all call-used registers. +- ``all-gpr`` zeros all call-used GPR registers. +- ``all-arg`` zeros all call-used registers used to pass arguments to the + function. +- ``all-gpr-arg`` zeros all call-used GPR registers used to pass arguments to + the function. + +The default for the attribute is contolled by the ``-fzero-call-used-regs`` +flag. + +.. _Return-Oriented Programming: https://en.wikipedia.org/wiki/Return-oriented_programming + }]; +} diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -464,6 +464,10 @@ /// Whether to skip RAX setup when passing variable arguments (x86 only). CODEGENOPT(SkipRaxSetup, 1, 0) +/// Whether to zero out caller-used registers before returning. +ENUM_CODEGENOPT(ZeroCallUsedRegs, llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind, + 5, llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::Skip) + #undef CODEGENOPT #undef ENUM_CODEGENOPT #undef VALUE_CODEGENOPT diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2907,6 +2907,14 @@ HelpText<"Enable matrix data type and related builtin functions">, MarshallingInfoFlag>; +def fzero_call_used_regs_EQ + : Joined<["-"], "fzero-call-used-regs=">, Group, Flags<[CC1Option]>, + HelpText<"Clear call-used registers upon function return.">, + Values<"skip,used-gpr-arg,used-gpr,used-arg,used,all-gpr-arg,all-gpr,all-arg,all">, + NormalizedValues<["Skip", "UsedGPRArg", "UsedGPR", "UsedArg", "Used", + "AllGPRArg", "AllGPR", "AllArg", "All"]>, + NormalizedValuesScope<"llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind">, + MarshallingInfoEnum, "Skip">; def fdebug_types_section: Flag <["-"], "fdebug-types-section">, Group, HelpText<"Place debug types in their own section (ELF Only)">; diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1868,6 +1868,37 @@ if (CodeGenOpts.SpeculativeLoadHardening) FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); + + // Add zero-call-used-regs attribute. + switch (CodeGenOpts.getZeroCallUsedRegs()) { + case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::Skip: + FuncAttrs.removeAttribute("zero-call-used-regs"); + break; + case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::UsedGPRArg: + FuncAttrs.addAttribute("zero-call-used-regs", "used-gpr-arg"); + break; + case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::UsedGPR: + FuncAttrs.addAttribute("zero-call-used-regs", "used-gpr"); + break; + case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::UsedArg: + FuncAttrs.addAttribute("zero-call-used-regs", "used-arg"); + break; + case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::Used: + FuncAttrs.addAttribute("zero-call-used-regs", "used"); + break; + case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::AllGPRArg: + FuncAttrs.addAttribute("zero-call-used-regs", "all-gpr-arg"); + break; + case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::AllGPR: + FuncAttrs.addAttribute("zero-call-used-regs", "all-gpr"); + break; + case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::AllArg: + FuncAttrs.addAttribute("zero-call-used-regs", "all-arg"); + break; + case llvm::ZeroCallUsedRegs::ZeroCallUsedRegsKind::All: + FuncAttrs.addAttribute("zero-call-used-regs", "all"); + break; + } } if (getLangOpts().assumeFunctionsAreConvergent()) { @@ -2156,6 +2187,15 @@ FuncAttrs.addAttribute(llvm::Attribute::SpeculativeLoadHardening); if (TargetDecl->hasAttr()) FuncAttrs.removeAttribute("split-stack"); + if (TargetDecl->hasAttr()) { + // A function "__attribute__((...))" overrides the command-line flag. + auto Kind = + TargetDecl->getAttr()->getZeroCallUsedRegs(); + FuncAttrs.removeAttribute("zero-call-used-regs"); + FuncAttrs.addAttribute( + "zero-call-used-regs", + ZeroCallUsedRegsAttr::ConvertZeroCallUsedRegsKindToStr(Kind)); + } // Add NonLazyBind attribute to function declarations when -fno-plt // is used. diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -981,6 +981,10 @@ CGM.getCodeGenOpts().StackAlignment)) Fn->addFnAttr("stackrealign"); + // "main" doesn't need to zero out call-used registers. + if (FD && FD->isMain()) + Fn->removeFnAttr("zero-call-used-regs"); + llvm::BasicBlock *EntryBB = createBasicBlock("entry", CurFn); // Create a marker to make it easy to insert allocas into the entryblock diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5888,6 +5888,16 @@ Args.AddLastArg(CmdArgs, options::OPT_fdigraphs, options::OPT_fno_digraphs); Args.AddLastArg(CmdArgs, options::OPT_femulated_tls, options::OPT_fno_emulated_tls); + Args.AddLastArg(CmdArgs, options::OPT_fzero_call_used_regs_EQ); + + if (Arg *A = Args.getLastArg(options::OPT_fzero_call_used_regs_EQ)) { + // FIXME: There's no reason for this to be restricted to X86. The backend + // code needs to be changed to include the appropriate function calls + // automatically. + if (!Triple.isX86()) + D.Diag(diag::err_drv_unsupported_opt_for_target) + << A->getAsString(Args) << TripleStr; + } // AltiVec-like language extensions aren't relevant for assembling. if (!isa(JA) || Output.getType() != types::TY_PP_Asm) diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7837,6 +7837,24 @@ D->addAttr(::new (S.Context) OpenCLAccessAttr(S.Context, AL)); } +static void handleZeroCallUsedRegsAttr(Sema &S, Decl *D, const ParsedAttr &AL) { + // Check that the argument is a string literal. + StringRef KindStr; + SourceLocation LiteralLoc; + if (!S.checkStringLiteralArgumentAttr(AL, 0, KindStr, &LiteralLoc)) + return; + + ZeroCallUsedRegsAttr::ZeroCallUsedRegsKind Kind; + if (!ZeroCallUsedRegsAttr::ConvertStrToZeroCallUsedRegsKind(KindStr, Kind)) { + S.Diag(LiteralLoc, diag::warn_attribute_type_not_supported) + << AL << KindStr; + return; + } + + D->dropAttr(); + D->addAttr(ZeroCallUsedRegsAttr::Create(S.Context, Kind, AL)); +} + static void handleSYCLKernelAttr(Sema &S, Decl *D, const ParsedAttr &AL) { // The 'sycl_kernel' attribute applies only to function templates. const auto *FD = cast(D); @@ -8581,6 +8599,9 @@ case ParsedAttr::AT_InternalLinkage: handleInternalLinkageAttr(S, D, AL); break; + case ParsedAttr::AT_ZeroCallUsedRegs: + handleZeroCallUsedRegsAttr(S, D, AL); + break; // Microsoft attributes: case ParsedAttr::AT_LayoutVersion: diff --git a/clang/test/CodeGen/zero-call-used-regs.c b/clang/test/CodeGen/zero-call-used-regs.c new file mode 100644 --- /dev/null +++ b/clang/test/CodeGen/zero-call-used-regs.c @@ -0,0 +1,249 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -emit-llvm -fzero-call-used-regs=skip -o - | FileCheck %s --check-prefix CHECK-SKIP +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -emit-llvm -fzero-call-used-regs=used-gpr-arg -o - | FileCheck %s --check-prefix CHECK-USED-GPR-ARG +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -emit-llvm -fzero-call-used-regs=used-gpr -o - | FileCheck %s --check-prefix CHECK-USED-GPR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -emit-llvm -fzero-call-used-regs=used-arg -o - | FileCheck %s --check-prefix CHECK-USED-ARG +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -emit-llvm -fzero-call-used-regs=used -o - | FileCheck %s --check-prefix CHECK-USED +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -emit-llvm -fzero-call-used-regs=all-gpr-arg -o - | FileCheck %s --check-prefix CHECK-ALL-GPR-ARG +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -emit-llvm -fzero-call-used-regs=all-gpr -o - | FileCheck %s --check-prefix CHECK-ALL-GPR +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -emit-llvm -fzero-call-used-regs=all-arg -o - | FileCheck %s --check-prefix CHECK-ALL-ARG +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu %s -emit-llvm -fzero-call-used-regs=all -o - | FileCheck %s --check-prefix CHECK-ALL + +// -fzero-call-used-regs=skip: +// +// CHECK-SKIP: define {{.*}} @no_attribute({{.*}} #[[ATTR_NUM:[0-9]*]] +// CHECK-SKIP: define {{.*}} @skip_test({{.*}} #[[ATTR_NUM_SKIP:[0-9]*]] +// CHECK-SKIP: define {{.*}} @used_gpr_arg_test({{.*}} #[[ATTR_NUM_USED_GPR_ARG:[0-9]*]] +// CHECK-SKIP: define {{.*}} @used_gpr_test({{.*}} #[[ATTR_NUM_USED_GPR:[0-9]*]] +// CHECK-SKIP: define {{.*}} @used_arg_test({{.*}} #[[ATTR_NUM_USED_ARG:[0-9]*]] +// CHECK-SKIP: define {{.*}} @used_test({{.*}} #[[ATTR_NUM_USED:[0-9]*]] +// CHECK-SKIP: define {{.*}} @all_gpr_arg_test({{.*}} #[[ATTR_NUM_ALL_GPR_ARG:[0-9]*]] +// CHECK-SKIP: define {{.*}} @all_gpr_test({{.*}} #[[ATTR_NUM_ALL_GPR:[0-9]*]] +// CHECK-SKIP: define {{.*}} @all_arg_test({{.*}} #[[ATTR_NUM_ALL_ARG:[0-9]*]] +// CHECK-SKIP: define {{.*}} @all_test({{.*}} #[[ATTR_NUM_ALL:[0-9]*]] +// +// CHECK-SKIP-NOT: attributes #[[ATTR_NUM]] = {{.*}} "zero-call-used-regs"= +// CHECK-SKIP: attributes #[[ATTR_NUM_SKIP]] = {{.*}} "zero-call-used-regs"="skip" +// CHECK-SKIP: attributes #[[ATTR_NUM_USED_GPR_ARG]] = {{.*}} "zero-call-used-regs"="used-gpr-arg" +// CHECK-SKIP: attributes #[[ATTR_NUM_USED_GPR]] = {{.*}} "zero-call-used-regs"="used-gpr" +// CHECK-SKIP: attributes #[[ATTR_NUM_USED_ARG]] = {{.*}} "zero-call-used-regs"="used-arg" +// CHECK-SKIP: attributes #[[ATTR_NUM_USED]] = {{.*}} "zero-call-used-regs"="used" +// CHECK-SKIP: attributes #[[ATTR_NUM_ALL_GPR_ARG]] = {{.*}} "zero-call-used-regs"="all-gpr-arg" +// CHECK-SKIP: attributes #[[ATTR_NUM_ALL_GPR]] = {{.*}} "zero-call-used-regs"="all-gpr" +// CHECK-SKIP: attributes #[[ATTR_NUM_ALL_ARG]] = {{.*}} "zero-call-used-regs"="all-arg" +// CHECK-SKIP: attributes #[[ATTR_NUM_ALL]] = {{.*}} "zero-call-used-regs"="all" + +// -fzero-call-used-regs=used-gpr-arg: +// +// CHECK-USED-GPR-ARG: define {{.*}} @no_attribute({{.*}} #[[ATTR_NUM_USED_GPR_ARG:[0-9]*]] +// CHECK-USED-GPR-ARG: define {{.*}} @skip_test({{.*}} #[[ATTR_NUM_SKIP:[0-9]*]] +// CHECK-USED-GPR-ARG: define {{.*}} @used_gpr_arg_test({{.*}} #[[ATTR_NUM_USED_GPR_ARG]] +// CHECK-USED-GPR-ARG: define {{.*}} @used_gpr_test({{.*}} #[[ATTR_NUM_USED_GPR:[0-9]*]] +// CHECK-USED-GPR-ARG: define {{.*}} @used_arg_test({{.*}} #[[ATTR_NUM_USED_ARG:[0-9]*]] +// CHECK-USED-GPR-ARG: define {{.*}} @used_test({{.*}} #[[ATTR_NUM_USED:[0-9]*]] +// CHECK-USED-GPR-ARG: define {{.*}} @all_gpr_arg_test({{.*}} #[[ATTR_NUM_ALL_GPR_ARG:[0-9]*]] +// CHECK-USED-GPR-ARG: define {{.*}} @all_gpr_test({{.*}} #[[ATTR_NUM_ALL_GPR:[0-9]*]] +// CHECK-USED-GPR-ARG: define {{.*}} @all_arg_test({{.*}} #[[ATTR_NUM_ALL_ARG:[0-9]*]] +// CHECK-USED-GPR-ARG: define {{.*}} @all_test({{.*}} #[[ATTR_NUM_ALL:[0-9]*]] +// +// CHECK-USED-GPR-ARG: attributes #[[ATTR_NUM_USED_GPR_ARG]] = {{.*}} "zero-call-used-regs"="used-gpr-arg" +// CHECK-USED-GPR-ARG: attributes #[[ATTR_NUM_SKIP]] = {{.*}} "zero-call-used-regs"="skip" +// CHECK-USED-GPR-ARG: attributes #[[ATTR_NUM_USED_GPR]] = {{.*}} "zero-call-used-regs"="used-gpr" +// CHECK-USED-GPR-ARG: attributes #[[ATTR_NUM_USED_ARG]] = {{.*}} "zero-call-used-regs"="used-arg" +// CHECK-USED-GPR-ARG: attributes #[[ATTR_NUM_USED]] = {{.*}} "zero-call-used-regs"="used" +// CHECK-USED-GPR-ARG: attributes #[[ATTR_NUM_ALL_GPR_ARG]] = {{.*}} "zero-call-used-regs"="all-gpr-arg" +// CHECK-USED-GPR-ARG: attributes #[[ATTR_NUM_ALL_GPR]] = {{.*}} "zero-call-used-regs"="all-gpr" +// CHECK-USED-GPR-ARG: attributes #[[ATTR_NUM_ALL_ARG]] = {{.*}} "zero-call-used-regs"="all-arg" +// CHECK-USED-GPR-ARG: attributes #[[ATTR_NUM_ALL]] = {{.*}} "zero-call-used-regs"="all" + +// -fzero-call-used-regs=used-gpr: +// +// CHECK-USED-GPR: define {{.*}} @no_attribute({{.*}} #[[ATTR_NUM_USED_GPR:[0-9]*]] +// CHECK-USED-GPR: define {{.*}} @skip_test({{.*}} #[[ATTR_NUM_SKIP:[0-9]*]] +// CHECK-USED-GPR: define {{.*}} @used_gpr_arg_test({{.*}} #[[ATTR_NUM_USED_GPR_ARG:[0-9]*]] +// CHECK-USED-GPR: define {{.*}} @used_gpr_test({{.*}} #[[ATTR_NUM_USED_GPR]] +// CHECK-USED-GPR: define {{.*}} @used_arg_test({{.*}} #[[ATTR_NUM_USED_ARG:[0-9]*]] +// CHECK-USED-GPR: define {{.*}} @used_test({{.*}} #[[ATTR_NUM_USED:[0-9]*]] +// CHECK-USED-GPR: define {{.*}} @all_gpr_arg_test({{.*}} #[[ATTR_NUM_ALL_GPR_ARG:[0-9]*]] +// CHECK-USED-GPR: define {{.*}} @all_gpr_test({{.*}} #[[ATTR_NUM_ALL_GPR:[0-9]*]] +// CHECK-USED-GPR: define {{.*}} @all_arg_test({{.*}} #[[ATTR_NUM_ALL_ARG:[0-9]*]] +// CHECK-USED-GPR: define {{.*}} @all_test({{.*}} #[[ATTR_NUM_ALL:[0-9]*]] +// +// CHECK-USED-GPR: attributes #[[ATTR_NUM_USED_GPR]] = {{.*}} "zero-call-used-regs"="used-gpr" +// CHECK-USED-GPR: attributes #[[ATTR_NUM_SKIP]] = {{.*}} "zero-call-used-regs"="skip" +// CHECK-USED-GPR: attributes #[[ATTR_NUM_USED_GPR_ARG]] = {{.*}} "zero-call-used-regs"="used-gpr-arg" +// CHECK-USED-GPR: attributes #[[ATTR_NUM_USED_ARG]] = {{.*}} "zero-call-used-regs"="used-arg" +// CHECK-USED-GPR: attributes #[[ATTR_NUM_USED]] = {{.*}} "zero-call-used-regs"="used" +// CHECK-USED-GPR: attributes #[[ATTR_NUM_ALL_GPR_ARG]] = {{.*}} "zero-call-used-regs"="all-gpr-arg" +// CHECK-USED-GPR: attributes #[[ATTR_NUM_ALL_GPR]] = {{.*}} "zero-call-used-regs"="all-gpr" +// CHECK-USED-GPR: attributes #[[ATTR_NUM_ALL_ARG]] = {{.*}} "zero-call-used-regs"="all-arg" +// CHECK-USED-GPR: attributes #[[ATTR_NUM_ALL]] = {{.*}} "zero-call-used-regs"="all" + +// -fzero-call-used-regs=used-arg: +// +// CHECK-USED-ARG: define {{.*}} @no_attribute({{.*}} #[[ATTR_NUM_USED_ARG:[0-9]*]] +// CHECK-USED-ARG: define {{.*}} @skip_test({{.*}} #[[ATTR_NUM_SKIP:[0-9]*]] +// CHECK-USED-ARG: define {{.*}} @used_gpr_arg_test({{.*}} #[[ATTR_NUM_USED_GPR_ARG:[0-9]*]] +// CHECK-USED-ARG: define {{.*}} @used_gpr_test({{.*}} #[[ATTR_NUM_USED_GPR:[0-9]*]] +// CHECK-USED-ARG: define {{.*}} @used_arg_test({{.*}} #[[ATTR_NUM_USED_ARG]] +// CHECK-USED-ARG: define {{.*}} @used_test({{.*}} #[[ATTR_NUM_USED:[0-9]*]] +// CHECK-USED-ARG: define {{.*}} @all_gpr_arg_test({{.*}} #[[ATTR_NUM_ALL_GPR_ARG:[0-9]*]] +// CHECK-USED-ARG: define {{.*}} @all_gpr_test({{.*}} #[[ATTR_NUM_ALL_GPR:[0-9]*]] +// CHECK-USED-ARG: define {{.*}} @all_arg_test({{.*}} #[[ATTR_NUM_ALL_ARG:[0-9]*]] +// CHECK-USED-ARG: define {{.*}} @all_test({{.*}} #[[ATTR_NUM_ALL:[0-9]*]] +// +// CHECK-USED-ARG: attributes #[[ATTR_NUM_USED_ARG]] = {{.*}} "zero-call-used-regs"="used-arg" +// CHECK-USED-ARG: attributes #[[ATTR_NUM_SKIP]] = {{.*}} "zero-call-used-regs"="skip" +// CHECK-USED-ARG: attributes #[[ATTR_NUM_USED_GPR_ARG]] = {{.*}} "zero-call-used-regs"="used-gpr-arg" +// CHECK-USED-ARG: attributes #[[ATTR_NUM_USED_GPR]] = {{.*}} "zero-call-used-regs"="used-gpr" +// CHECK-USED-ARG: attributes #[[ATTR_NUM_USED]] = {{.*}} "zero-call-used-regs"="used" +// CHECK-USED-ARG: attributes #[[ATTR_NUM_ALL_GPR_ARG]] = {{.*}} "zero-call-used-regs"="all-gpr-arg" +// CHECK-USED-ARG: attributes #[[ATTR_NUM_ALL_GPR]] = {{.*}} "zero-call-used-regs"="all-gpr" +// CHECK-USED-ARG: attributes #[[ATTR_NUM_ALL_ARG]] = {{.*}} "zero-call-used-regs"="all-arg" +// CHECK-USED-ARG: attributes #[[ATTR_NUM_ALL]] = {{.*}} "zero-call-used-regs"="all" + +// -fzero-call-used-regs=used: +// +// CHECK-USED: define {{.*}} @no_attribute({{.*}} #[[ATTR_NUM_USED:[0-9]*]] +// CHECK-USED: define {{.*}} @skip_test({{.*}} #[[ATTR_NUM_SKIP:[0-9]*]] +// CHECK-USED: define {{.*}} @used_gpr_arg_test({{.*}} #[[ATTR_NUM_USED_GPR_ARG:[0-9]*]] +// CHECK-USED: define {{.*}} @used_gpr_test({{.*}} #[[ATTR_NUM_USED_GPR:[0-9]*]] +// CHECK-USED: define {{.*}} @used_arg_test({{.*}} #[[ATTR_NUM_USED_ARG:[0-9]*]] +// CHECK-USED: define {{.*}} @used_test({{.*}} #[[ATTR_NUM_USED]] +// CHECK-USED: define {{.*}} @all_gpr_arg_test({{.*}} #[[ATTR_NUM_ALL_GPR_ARG:[0-9]*]] +// CHECK-USED: define {{.*}} @all_gpr_test({{.*}} #[[ATTR_NUM_ALL_GPR:[0-9]*]] +// CHECK-USED: define {{.*}} @all_arg_test({{.*}} #[[ATTR_NUM_ALL_ARG:[0-9]*]] +// CHECK-USED: define {{.*}} @all_test({{.*}} #[[ATTR_NUM_ALL:[0-9]*]] +// +// CHECK-USED: attributes #[[ATTR_NUM_USED]] = {{.*}} "zero-call-used-regs"="used" +// CHECK-USED: attributes #[[ATTR_NUM_SKIP]] = {{.*}} "zero-call-used-regs"="skip" +// CHECK-USED: attributes #[[ATTR_NUM_USED_GPR_ARG]] = {{.*}} "zero-call-used-regs"="used-gpr-arg" +// CHECK-USED: attributes #[[ATTR_NUM_USED_GPR]] = {{.*}} "zero-call-used-regs"="used-gpr" +// CHECK-USED: attributes #[[ATTR_NUM_USED_ARG]] = {{.*}} "zero-call-used-regs"="used-arg" +// CHECK-USED: attributes #[[ATTR_NUM_ALL_GPR_ARG]] = {{.*}} "zero-call-used-regs"="all-gpr-arg" +// CHECK-USED: attributes #[[ATTR_NUM_ALL_GPR]] = {{.*}} "zero-call-used-regs"="all-gpr" +// CHECK-USED: attributes #[[ATTR_NUM_ALL_ARG]] = {{.*}} "zero-call-used-regs"="all-arg" +// CHECK-USED: attributes #[[ATTR_NUM_ALL]] = {{.*}} "zero-call-used-regs"="all" + +// -fzero-call-used-regs=all-gpr-arg: +// +// CHECK-ALL-GPR-ARG: define {{.*}} @no_attribute({{.*}} #[[ATTR_NUM_ALL_GPR_ARG:[0-9]*]] +// CHECK-ALL-GPR-ARG: define {{.*}} @skip_test({{.*}} #[[ATTR_NUM_SKIP:[0-9]*]] +// CHECK-ALL-GPR-ARG: define {{.*}} @used_gpr_arg_test({{.*}} #[[ATTR_NUM_USED_GPR_ARG:[0-9]*]] +// CHECK-ALL-GPR-ARG: define {{.*}} @used_gpr_test({{.*}} #[[ATTR_NUM_USED_GPR:[0-9]*]] +// CHECK-ALL-GPR-ARG: define {{.*}} @used_arg_test({{.*}} #[[ATTR_NUM_USED_ARG:[0-9]*]] +// CHECK-ALL-GPR-ARG: define {{.*}} @used_test({{.*}} #[[ATTR_NUM_USED:[0-9]*]] +// CHECK-ALL-GPR-ARG: define {{.*}} @all_gpr_arg_test({{.*}} #[[ATTR_NUM_ALL_GPR_ARG]] +// CHECK-ALL-GPR-ARG: define {{.*}} @all_gpr_test({{.*}} #[[ATTR_NUM_ALL_GPR:[0-9]*]] +// CHECK-ALL-GPR-ARG: define {{.*}} @all_arg_test({{.*}} #[[ATTR_NUM_ALL_ARG:[0-9]*]] +// CHECK-ALL-GPR-ARG: define {{.*}} @all_test({{.*}} #[[ATTR_NUM_ALL:[0-9]*]] +// +// CHECK-ALL-GPR-ARG: attributes #[[ATTR_NUM_ALL_GPR_ARG]] = {{.*}} "zero-call-used-regs"="all-gpr-arg" +// CHECK-ALL-GPR-ARG: attributes #[[ATTR_NUM_SKIP]] = {{.*}} "zero-call-used-regs"="skip" +// CHECK-ALL-GPR-ARG: attributes #[[ATTR_NUM_USED_GPR_ARG]] = {{.*}} "zero-call-used-regs"="used-gpr-arg" +// CHECK-ALL-GPR-ARG: attributes #[[ATTR_NUM_USED_GPR]] = {{.*}} "zero-call-used-regs"="used-gpr" +// CHECK-ALL-GPR-ARG: attributes #[[ATTR_NUM_USED_ARG]] = {{.*}} "zero-call-used-regs"="used-arg" +// CHECK-ALL-GPR-ARG: attributes #[[ATTR_NUM_USED]] = {{.*}} "zero-call-used-regs"="used" +// CHECK-ALL-GPR-ARG: attributes #[[ATTR_NUM_ALL_GPR]] = {{.*}} "zero-call-used-regs"="all-gpr" +// CHECK-ALL-GPR-ARG: attributes #[[ATTR_NUM_ALL_ARG]] = {{.*}} "zero-call-used-regs"="all-arg" +// CHECK-ALL-GPR-ARG: attributes #[[ATTR_NUM_ALL]] = {{.*}} "zero-call-used-regs"="all" + +// -fzero-call-used-regs=all-gpr: +// +// CHECK-ALL-GPR: define {{.*}} @no_attribute({{.*}} #[[ATTR_NUM_ALL_GPR:[0-9]*]] +// CHECK-ALL-GPR: define {{.*}} @skip_test({{.*}} #[[ATTR_NUM_SKIP:[0-9]*]] +// CHECK-ALL-GPR: define {{.*}} @used_gpr_arg_test({{.*}} #[[ATTR_NUM_USED_GPR_ARG:[0-9]*]] +// CHECK-ALL-GPR: define {{.*}} @used_gpr_test({{.*}} #[[ATTR_NUM_USED_GPR:[0-9]*]] +// CHECK-ALL-GPR: define {{.*}} @used_arg_test({{.*}} #[[ATTR_NUM_USED_ARG:[0-9]*]] +// CHECK-ALL-GPR: define {{.*}} @used_test({{.*}} #[[ATTR_NUM_USED:[0-9]*]] +// CHECK-ALL-GPR: define {{.*}} @all_gpr_arg_test({{.*}} #[[ATTR_NUM_ALL_GPR_ARG:[0-9]*]] +// CHECK-ALL-GPR: define {{.*}} @all_gpr_test({{.*}} #[[ATTR_NUM_ALL_GPR]] +// CHECK-ALL-GPR: define {{.*}} @all_arg_test({{.*}} #[[ATTR_NUM_ALL_ARG:[0-9]*]] +// CHECK-ALL-GPR: define {{.*}} @all_test({{.*}} #[[ATTR_NUM_ALL:[0-9]*]] +// +// CHECK-ALL-GPR: attributes #[[ATTR_NUM_ALL_GPR]] = {{.*}} "zero-call-used-regs"="all-gpr" +// CHECK-ALL-GPR: attributes #[[ATTR_NUM_SKIP]] = {{.*}} "zero-call-used-regs"="skip" +// CHECK-ALL-GPR: attributes #[[ATTR_NUM_USED_GPR_ARG]] = {{.*}} "zero-call-used-regs"="used-gpr-arg" +// CHECK-ALL-GPR: attributes #[[ATTR_NUM_USED_GPR]] = {{.*}} "zero-call-used-regs"="used-gpr" +// CHECK-ALL-GPR: attributes #[[ATTR_NUM_USED_ARG]] = {{.*}} "zero-call-used-regs"="used-arg" +// CHECK-ALL-GPR: attributes #[[ATTR_NUM_USED]] = {{.*}} "zero-call-used-regs"="used" +// CHECK-ALL-GPR: attributes #[[ATTR_NUM_ALL_GPR_ARG]] = {{.*}} "zero-call-used-regs"="all-gpr-arg" +// CHECK-ALL-GPR: attributes #[[ATTR_NUM_ALL_ARG]] = {{.*}} "zero-call-used-regs"="all-arg" +// CHECK-ALL-GPR: attributes #[[ATTR_NUM_ALL]] = {{.*}} "zero-call-used-regs"="all" + +// -fzero-call-used-regs=all-arg: +// +// CHECK-ALL-ARG: define {{.*}} @no_attribute({{.*}} #[[ATTR_NUM_ALL_ARG:[0-9]*]] +// CHECK-ALL-ARG: define {{.*}} @skip_test({{.*}} #[[ATTR_NUM_SKIP:[0-9]*]] +// CHECK-ALL-ARG: define {{.*}} @used_gpr_arg_test({{.*}} #[[ATTR_NUM_USED_GPR_ARG:[0-9]*]] +// CHECK-ALL-ARG: define {{.*}} @used_gpr_test({{.*}} #[[ATTR_NUM_USED_GPR:[0-9]*]] +// CHECK-ALL-ARG: define {{.*}} @used_arg_test({{.*}} #[[ATTR_NUM_USED_ARG:[0-9]*]] +// CHECK-ALL-ARG: define {{.*}} @used_test({{.*}} #[[ATTR_NUM_USED:[0-9]*]] +// CHECK-ALL-ARG: define {{.*}} @all_gpr_arg_test({{.*}} #[[ATTR_NUM_ALL_GPR_ARG:[0-9]*]] +// CHECK-ALL-ARG: define {{.*}} @all_gpr_test({{.*}} #[[ATTR_NUM_ALL_GPR:[0-9]*]] +// CHECK-ALL-ARG: define {{.*}} @all_arg_test({{.*}} #[[ATTR_NUM_ALL_ARG]] +// CHECK-ALL-ARG: define {{.*}} @all_test({{.*}} #[[ATTR_NUM_ALL:[0-9]*]] +// +// CHECK-ALL-ARG: attributes #[[ATTR_NUM_ALL_ARG]] = {{.*}} "zero-call-used-regs"="all-arg" +// CHECK-ALL-ARG: attributes #[[ATTR_NUM_SKIP]] = {{.*}} "zero-call-used-regs"="skip" +// CHECK-ALL-ARG: attributes #[[ATTR_NUM_USED_GPR_ARG]] = {{.*}} "zero-call-used-regs"="used-gpr-arg" +// CHECK-ALL-ARG: attributes #[[ATTR_NUM_USED_GPR]] = {{.*}} "zero-call-used-regs"="used-gpr" +// CHECK-ALL-ARG: attributes #[[ATTR_NUM_USED_ARG]] = {{.*}} "zero-call-used-regs"="used-arg" +// CHECK-ALL-ARG: attributes #[[ATTR_NUM_USED]] = {{.*}} "zero-call-used-regs"="used" +// CHECK-ALL-ARG: attributes #[[ATTR_NUM_ALL_GPR_ARG]] = {{.*}} "zero-call-used-regs"="all-gpr-arg" +// CHECK-ALL-ARG: attributes #[[ATTR_NUM_ALL_GPR]] = {{.*}} "zero-call-used-regs"="all-gpr" +// CHECK-ALL-ARG: attributes #[[ATTR_NUM_ALL]] = {{.*}} "zero-call-used-regs"="all" + +// -fzero-call-used-regs=all: +// +// CHECK-ALL: define {{.*}} @no_attribute({{.*}} #[[ATTR_NUM_ALL:[0-9]*]] +// CHECK-ALL: define {{.*}} @skip_test({{.*}} #[[ATTR_NUM_SKIP:[0-9]*]] +// CHECK-ALL: define {{.*}} @used_gpr_arg_test({{.*}} #[[ATTR_NUM_USED_GPR_ARG:[0-9]*]] +// CHECK-ALL: define {{.*}} @used_gpr_test({{.*}} #[[ATTR_NUM_USED_GPR:[0-9]*]] +// CHECK-ALL: define {{.*}} @used_arg_test({{.*}} #[[ATTR_NUM_USED_ARG:[0-9]*]] +// CHECK-ALL: define {{.*}} @used_test({{.*}} #[[ATTR_NUM_USED:[0-9]*]] +// CHECK-ALL: define {{.*}} @all_gpr_arg_test({{.*}} #[[ATTR_NUM_ALL_GPR_ARG:[0-9]*]] +// CHECK-ALL: define {{.*}} @all_gpr_test({{.*}} #[[ATTR_NUM_ALL_GPR:[0-9]*]] +// CHECK-ALL: define {{.*}} @all_arg_test({{.*}} #[[ATTR_NUM_ALL_ARG:[0-9]*]] +// CHECK-ALL: define {{.*}} @all_test({{.*}} #[[ATTR_NUM_ALL]] +// +// CHECK-ALL: attributes #[[ATTR_NUM_ALL]] = {{.*}} "zero-call-used-regs"="all" +// CHECK-ALL: attributes #[[ATTR_NUM_SKIP]] = {{.*}} "zero-call-used-regs"="skip" +// CHECK-ALL: attributes #[[ATTR_NUM_USED_GPR_ARG]] = {{.*}} "zero-call-used-regs"="used-gpr-arg" +// CHECK-ALL: attributes #[[ATTR_NUM_USED_GPR]] = {{.*}} "zero-call-used-regs"="used-gpr" +// CHECK-ALL: attributes #[[ATTR_NUM_USED_ARG]] = {{.*}} "zero-call-used-regs"="used-arg" +// CHECK-ALL: attributes #[[ATTR_NUM_USED]] = {{.*}} "zero-call-used-regs"="used" +// CHECK-ALL: attributes #[[ATTR_NUM_ALL_GPR_ARG]] = {{.*}} "zero-call-used-regs"="all-gpr-arg" +// CHECK-ALL: attributes #[[ATTR_NUM_ALL_GPR]] = {{.*}} "zero-call-used-regs"="all-gpr" +// CHECK-ALL: attributes #[[ATTR_NUM_ALL_ARG]] = {{.*}} "zero-call-used-regs"="all-arg" + +#define __zero_call_used_regs(kind) __attribute__((zero_call_used_regs(kind))) + +void no_attribute() { +} + +void __zero_call_used_regs("skip") skip_test() { +} + +void __zero_call_used_regs("used-gpr-arg") used_gpr_arg_test() { +} + +void __zero_call_used_regs("used-gpr") used_gpr_test() { +} + +void __zero_call_used_regs("used-arg") used_arg_test() { +} + +void __zero_call_used_regs("used") used_test() { +} + +void __zero_call_used_regs("all-gpr-arg") all_gpr_arg_test() { +} + +void __zero_call_used_regs("all-gpr") all_gpr_test() { +} + +void __zero_call_used_regs("all-arg") all_arg_test() { +} + +void __zero_call_used_regs("all") all_test() { +} diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -192,4 +192,5 @@ // CHECK-NEXT: WorkGroupSizeHint (SubjectMatchRule_function) // CHECK-NEXT: XRayInstrument (SubjectMatchRule_function, SubjectMatchRule_objc_method) // CHECK-NEXT: XRayLogArgs (SubjectMatchRule_function, SubjectMatchRule_objc_method) +// CHECK-NEXT: ZeroCallUsedRegs (SubjectMatchRule_function) // CHECK-NEXT: End of supported attributes. diff --git a/clang/test/Sema/zero_call_used_regs.c b/clang/test/Sema/zero_call_used_regs.c new file mode 100644 --- /dev/null +++ b/clang/test/Sema/zero_call_used_regs.c @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 %s -verify -fno-builtin + +#define _zero_call_used_regs(...) __attribute__((zero_call_used_regs(__VA_ARGS__))) + +void failure() _zero_call_used_regs(); // expected-error {{takes one argument}} +void failure() _zero_call_used_regs("used", "used-gpr"); // expected-error {{takes one argument}} +void failure() _zero_call_used_regs(0); // expected-error {{requires a string}} +void failure() _zero_call_used_regs("hello"); // expected-warning {{argument not supported: hello}} + +void success() _zero_call_used_regs("skip"); +void success() _zero_call_used_regs("used-gpr-arg"); +void success() _zero_call_used_regs("used-gpr"); +void success() _zero_call_used_regs("used-arg"); +void success() _zero_call_used_regs("used"); +void success() _zero_call_used_regs("all-gpr-arg"); +void success() _zero_call_used_regs("all-gpr"); +void success() _zero_call_used_regs("all-arg"); +void success() _zero_call_used_regs("all"); diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h --- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -229,6 +229,16 @@ /// Returns true if the updated CSR list was initialized and false otherwise. bool isUpdatedCSRsInitialized() const { return IsUpdatedCSRsInitialized; } + /// Returns true if a register can be used as an argument to a function. + bool isArgumentRegister(const MachineFunction &MF, MCRegister Reg) const; + + /// Returns true if a register is a fixed register. + bool isFixedRegister(const MachineFunction &MF, MCRegister Reg) const; + + /// Returns true if a register is a general purpose register. + bool isGeneralPurposeRegister(const MachineFunction &MF, + MCRegister Reg) const; + /// Disables the register from the list of CSRs. /// I.e. the register will not appear as part of the CSR mask. /// \see UpdatedCalleeSavedRegs. diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h --- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h +++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h @@ -213,6 +213,10 @@ virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const = 0; + /// emitZeroCallUsedRegs - Zeros out call used registers. + virtual void emitZeroCallUsedRegs(BitVector RegsToZero, + MachineBasicBlock &MBB) const {} + /// With basic block sections, emit callee saved frame moves for basic blocks /// that are in a different section. virtual void diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h --- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h @@ -567,6 +567,24 @@ virtual bool isCalleeSavedPhysReg(MCRegister PhysReg, const MachineFunction &MF) const; + /// Returns true if PhysReg can be used as an argument to a function. + virtual bool isArgumentRegister(const MachineFunction &MF, + MCRegister PhysReg) const { + return false; + } + + /// Returns true if PhysReg is a fixed register. + virtual bool isFixedRegister(const MachineFunction &MF, + MCRegister PhysReg) const { + return false; + } + + /// Returns true if PhysReg is a general purpose register. + virtual bool isGeneralPurposeRegister(const MachineFunction &MF, + MCRegister PhysReg) const { + return false; + } + /// Prior to adding the live-out mask to a stackmap or patchpoint /// instruction, provide the target the opportunity to adjust it (mainly to /// remove pseudo-registers that should be ignored). diff --git a/llvm/include/llvm/Support/CodeGen.h b/llvm/include/llvm/Support/CodeGen.h --- a/llvm/include/llvm/Support/CodeGen.h +++ b/llvm/include/llvm/Support/CodeGen.h @@ -69,6 +69,34 @@ // Specify what functions should keep the frame pointer. enum class FramePointerKind { None, NonLeaf, All }; + // Specify what type of zeroing callee-used registers. + namespace ZeroCallUsedRegs { + const unsigned ONLY_USED = 1U << 1; + const unsigned ONLY_GPR = 1U << 2; + const unsigned ONLY_ARG = 1U << 3; + + enum class ZeroCallUsedRegsKind : unsigned int { + // Don't zero any call-used regs. + Skip = 1U << 0, + // Only zeros call-used GPRs used in the fn and pass args. + UsedGPRArg = ONLY_USED | ONLY_GPR | ONLY_ARG, + // Only zeros call-used GPRs used in the fn. + UsedGPR = ONLY_USED | ONLY_GPR, + // Only zeros call-used regs used in the fn and pass args. + UsedArg = ONLY_USED | ONLY_ARG, + // Only zeros call-used regs used in the fn. + Used = ONLY_USED, + // Zeros all call-used GPRs that pass args. + AllGPRArg = ONLY_GPR | ONLY_ARG, + // Zeros all call-used GPRs. + AllGPR = ONLY_GPR, + // Zeros all call-used regs that pass args. + AllArg = ONLY_ARG, + // Zeros all call-used regs. + All = 0, + }; + } // namespace ZeroCallUsedRegs + } // end llvm namespace #endif diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -389,6 +389,14 @@ list RegAsmNames = RegNames; } +// RegisterCategory - This class is a list of RegisterClasses that belong to a +// general cateogry --- e.g. "general purpose" or "fixed" registers. This is +// useful for identifying registers in a generic way instead of having +// information about a specific target's registers. +class RegisterCategory classes> { + // Classes - A list of register classes that fall within the category. + list Classes = classes; +} //===----------------------------------------------------------------------===// // DwarfRegNum - This class provides a mapping of the llvm register enumeration diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp --- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -651,3 +651,18 @@ } return false; } + +bool MachineRegisterInfo::isArgumentRegister(const MachineFunction &MF, + MCRegister Reg) const { + return getTargetRegisterInfo()->isArgumentRegister(MF, Reg); +} + +bool MachineRegisterInfo::isFixedRegister(const MachineFunction &MF, + MCRegister Reg) const { + return getTargetRegisterInfo()->isFixedRegister(MF, Reg); +} + +bool MachineRegisterInfo::isGeneralPurposeRegister(const MachineFunction &MF, + MCRegister Reg) const { + return getTargetRegisterInfo()->isGeneralPurposeRegister(MF, Reg); +} diff --git a/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/llvm/lib/CodeGen/PrologEpilogInserter.cpp --- a/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -130,6 +130,7 @@ void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF, int &SPAdj); void insertPrologEpilogCode(MachineFunction &MF); + void insertZeroCallUsedRegs(MachineFunction &MF); }; } // end anonymous namespace @@ -1145,6 +1146,9 @@ for (MachineBasicBlock *RestoreBlock : RestoreBlocks) TFI.emitEpilogue(MF, *RestoreBlock); + // Zero call used registers before restoring callee-saved registers. + insertZeroCallUsedRegs(MF); + for (MachineBasicBlock *SaveBlock : SaveBlocks) TFI.inlineStackProbe(MF, *SaveBlock); @@ -1171,6 +1175,95 @@ TFI.adjustForHiPEPrologue(MF, *SaveBlock); } +/// insertZeroCallUsedRegs - Zero out call used registers. +void PEI::insertZeroCallUsedRegs(MachineFunction &MF) { + const Function &F = MF.getFunction(); + + if (!F.hasFnAttribute("zero-call-used-regs")) + return; + + using namespace ZeroCallUsedRegs; + + ZeroCallUsedRegsKind ZeroRegsKind = + StringSwitch( + F.getFnAttribute("zero-call-used-regs").getValueAsString()) + .Case("skip", ZeroCallUsedRegsKind::Skip) + .Case("used-gpr-arg", ZeroCallUsedRegsKind::UsedGPRArg) + .Case("used-gpr", ZeroCallUsedRegsKind::UsedGPR) + .Case("used-arg", ZeroCallUsedRegsKind::UsedArg) + .Case("used", ZeroCallUsedRegsKind::Used) + .Case("all-gpr-arg", ZeroCallUsedRegsKind::AllGPRArg) + .Case("all-gpr", ZeroCallUsedRegsKind::AllGPR) + .Case("all-arg", ZeroCallUsedRegsKind::AllArg) + .Case("all", ZeroCallUsedRegsKind::All); + + if (ZeroRegsKind == ZeroCallUsedRegsKind::Skip) + return; + + const bool OnlyGPR = static_cast(ZeroRegsKind) & ONLY_GPR; + const bool OnlyUsed = static_cast(ZeroRegsKind) & ONLY_USED; + const bool OnlyArg = static_cast(ZeroRegsKind) & ONLY_ARG; + + const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo(); + const BitVector AllocatableSet(TRI.getAllocatableSet(MF)); + + // Mark all used registers. + BitVector UsedRegs(TRI.getNumRegs()); + if (OnlyUsed) + for (const MachineBasicBlock &MBB : MF) + for (const MachineInstr &MI : MBB) + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + + MCRegister Reg = MO.getReg(); + if (AllocatableSet[Reg] && !MO.isImplicit() && + (MO.isDef() || MO.isUse())) + UsedRegs.set(Reg); + } + + BitVector RegsToZero(TRI.getNumRegs()); + for (MCRegister Reg : AllocatableSet.set_bits()) { + // Skip over fixed registers. + if (TRI.isFixedRegister(MF, Reg)) + continue; + + // Want only general purpose registers. + if (OnlyGPR && !TRI.isGeneralPurposeRegister(MF, Reg)) + continue; + + // Want only used registers. + if (OnlyUsed && !UsedRegs[Reg]) + continue; + + // Want only registers used for arguments. + if (OnlyArg && !TRI.isArgumentRegister(MF, Reg)) + continue; + + RegsToZero.set(Reg); + } + + // Remove registers that are live when leaving the function. + for (const MachineBasicBlock &MBB : MF) + for (const MachineInstr &MI : MBB.terminators()) { + if (!MI.isReturn()) + continue; + + for (const auto &MO : MI.operands()) { + if (!MO.isReg()) + continue; + + for (MCPhysReg SReg : TRI.sub_and_superregs_inclusive(MO.getReg())) + RegsToZero.reset(SReg); + } + } + + const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering(); + for (MachineBasicBlock &MBB : MF) + if (MBB.isReturnBlock()) + TFI.emitZeroCallUsedRegs(RegsToZero, MBB); +} + /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical /// register references and actual offsets. void PEI::replaceFrameIndices(MachineFunction &MF) { diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h --- a/llvm/lib/Target/X86/X86FrameLowering.h +++ b/llvm/lib/Target/X86/X86FrameLowering.h @@ -233,6 +233,10 @@ const DebugLoc &DL, uint64_t Offset, uint64_t Align) const; + /// Emit target zero call-used regs. + void emitZeroCallUsedRegs(BitVector RegsToZero, + MachineBasicBlock &MBB) const override; + void adjustFrameForMsvcCxxEh(MachineFunction &MF) const; /// Aligns the stack pointer by ANDing it with -MaxAlign. diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp --- a/llvm/lib/Target/X86/X86FrameLowering.cpp +++ b/llvm/lib/Target/X86/X86FrameLowering.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "X86FrameLowering.h" +#include "MCTargetDesc/X86MCTargetDesc.h" #include "X86InstrBuilder.h" #include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" @@ -492,6 +493,123 @@ } } +static MachineBasicBlock::iterator +skipCalleeSavedPopInstrs(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + // Skip the callee-saved pop instructions. + MachineBasicBlock::iterator FirstCSPop = MBBI; + while (MBBI != MBB.begin()) { + MachineBasicBlock::iterator PI = std::prev(MBBI); + unsigned Opc = PI->getOpcode(); + + if (Opc != X86::DBG_VALUE && !PI->isTerminator() && + !PI->isCFIInstruction()) { + if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) && + (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) && + (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) && + (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy))) + break; + FirstCSPop = PI; + } + + --MBBI; + } + return FirstCSPop; +} + +void X86FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero, + MachineBasicBlock &MBB) const { + const MachineFunction &MF = *MBB.getParent(); + + // Don't clear registers that will just be reset before exiting. + for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo()) + for (MCRegister Reg : TRI->sub_and_superregs_inclusive(CSI.getReg())) + RegsToZero.reset(Reg); + + // Insertion point. + MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); + MBBI = skipCalleeSavedPopInstrs(MBB, MBBI); + + // We don't need to zero out registers that are clobbered by "pop" + // instructions. + for (MachineBasicBlock::iterator I = MBBI, E = MBB.end(); I != E; ++I) + for (const MachineOperand &MO : I->operands()) { + if (!MO.isReg()) + continue; + + for (const MCPhysReg &Reg : TRI->sub_and_superregs_inclusive(MO.getReg())) + RegsToZero.reset(Reg); + } + + DebugLoc DL; + if (MBBI != MBB.end()) + DL = MBBI->getDebugLoc(); + + // Zero out FP stack if referenced. Do this outside of the loop below so that + // it's done only once. + const X86Subtarget &ST = MF.getSubtarget(); + for (MCRegister Reg : RegsToZero.set_bits()) { + if (!X86::RFP80RegClass.contains(Reg)) + continue; + + unsigned NumFPRegs = ST.is64Bit() ? 8 : 7; + for (unsigned i = 0; i != NumFPRegs; ++i) + BuildMI(MBB, MBBI, DL, TII.get(X86::LD_F0)); + + for (unsigned i = 0; i != NumFPRegs; ++i) + BuildMI(MBB, MBBI, DL, TII.get(X86::ST_FPrr)).addReg(X86::ST0); + break; + } + + // For GPRs, we only care to clear out the 32-bit register. + for (MCRegister Reg : RegsToZero.set_bits()) + if (TRI->isGeneralPurposeRegister(MF, Reg)) { + Reg = getX86SubSuperRegisterOrZero(Reg, 32); + for (const MCPhysReg &Reg : TRI->sub_and_superregs_inclusive(Reg)) + RegsToZero.reset(Reg); + RegsToZero.set(Reg); + } + + // Zero out registers. + for (MCRegister Reg : RegsToZero.set_bits()) { + if (ST.hasMMX() && X86::VR64RegClass.contains(Reg)) + // FIXME: Ignore MMX registers? + continue; + + unsigned XorOp; + if (TRI->isGeneralPurposeRegister(MF, Reg)) { + XorOp = X86::XOR32rr; + } else if (X86::VR128RegClass.contains(Reg)) { + // XMM# + if (!ST.hasSSE1()) + continue; + XorOp = X86::PXORrr; + } else if (X86::VR256RegClass.contains(Reg)) { + // YMM# + if (!ST.hasAVX()) + continue; + XorOp = X86::VPXORrr; + } else if (X86::VR512RegClass.contains(Reg)) { + // ZMM# + if (!ST.hasAVX512()) + continue; + XorOp = X86::VPXORYrr; + } else if (X86::VK1RegClass.contains(Reg) || + X86::VK2RegClass.contains(Reg) || + X86::VK4RegClass.contains(Reg) || + X86::VK8RegClass.contains(Reg) || + X86::VK16RegClass.contains(Reg)) { + if (!ST.hasVLX()) + continue; + XorOp = ST.hasBWI() ? X86::KXORQrr : X86::KXORWrr; + } else { + continue; + } + + BuildMI(MBB, MBBI, DL, TII.get(XorOp), Reg).addReg(Reg).addReg(Reg); + } +} + void X86FrameLowering::emitStackProbe( MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog, @@ -2157,24 +2275,8 @@ } } + MBBI = skipCalleeSavedPopInstrs(MBB, MBBI); MachineBasicBlock::iterator FirstCSPop = MBBI; - // Skip the callee-saved pop instructions. - while (MBBI != MBB.begin()) { - MachineBasicBlock::iterator PI = std::prev(MBBI); - unsigned Opc = PI->getOpcode(); - - if (Opc != X86::DBG_VALUE && !PI->isTerminator()) { - if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) && - (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) && - (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) && - (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy))) - break; - FirstCSPop = PI; - } - - --MBBI; - } - MBBI = FirstCSPop; if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET) emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator); diff --git a/llvm/lib/Target/X86/X86RegisterInfo.h b/llvm/lib/Target/X86/X86RegisterInfo.h --- a/llvm/lib/Target/X86/X86RegisterInfo.h +++ b/llvm/lib/Target/X86/X86RegisterInfo.h @@ -115,6 +115,15 @@ /// register scavenger to determine what registers are free. BitVector getReservedRegs(const MachineFunction &MF) const override; + /// isArgumentReg - Returns true if Reg can be used as an argument to a + /// function. + bool isArgumentRegister(const MachineFunction &MF, + MCRegister Reg) const override; + + /// Returns true if PhysReg is a fixed register. + bool isFixedRegister(const MachineFunction &MF, + MCRegister PhysReg) const override; + void adjustStackMapLiveOutMask(uint32_t *Mask) const override; bool hasBasePointer(const MachineFunction &MF) const; diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp --- a/llvm/lib/Target/X86/X86RegisterInfo.cpp +++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp @@ -618,6 +618,62 @@ return Reserved; } +bool X86RegisterInfo::isArgumentRegister(const MachineFunction &MF, + MCRegister Reg) const { + const X86Subtarget &ST = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); + auto IsSubReg = [&](MCRegister RegA, MCRegister RegB) { + return TRI.isSuperOrSubRegisterEq(RegA, RegB); + }; + + if (!ST.is64Bit()) + return llvm::any_of( + SmallVector{X86::EAX, X86::ECX, X86::EDX}, + [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); }) || + (ST.hasMMX() && X86::VR64RegClass.contains(Reg)); + + CallingConv::ID CC = MF.getFunction().getCallingConv(); + + if (CC == CallingConv::X86_64_SysV && IsSubReg(X86::RAX, Reg)) + return true; + + if (llvm::any_of( + SmallVector{X86::RDX, X86::RCX, X86::R8, X86::R9}, + [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) + return true; + + if (CC != CallingConv::Win64 && + llvm::any_of(SmallVector{X86::RDI, X86::RSI}, + [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) + return true; + + if (ST.hasSSE1() && + llvm::any_of(SmallVector{X86::XMM0, X86::XMM1, X86::XMM2, + X86::XMM3, X86::XMM4, X86::XMM5, + X86::XMM6, X86::XMM7}, + [&](MCRegister &RegA) { return IsSubReg(RegA, Reg); })) + return true; + + return false; +} + +bool X86RegisterInfo::isFixedRegister(const MachineFunction &MF, + MCRegister PhysReg) const { + const X86Subtarget &ST = MF.getSubtarget(); + const TargetRegisterInfo &TRI = *ST.getRegisterInfo(); + + // Stack pointer. + if (TRI.isSuperOrSubRegisterEq(X86::RSP, PhysReg)) + return true; + + // Don't use the frame pointer if it's being used. + const X86FrameLowering &TFI = *getFrameLowering(MF); + if (TFI.hasFP(MF) && TRI.isSuperOrSubRegisterEq(X86::RBP, PhysReg)) + return true; + + return X86GenRegisterInfo::isFixedRegister(MF, PhysReg); +} + void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const { // Check if the EFLAGS register is marked as live-out. This shouldn't happen, // because the calling convention defines the EFLAGS register as NOT diff --git a/llvm/lib/Target/X86/X86RegisterInfo.td b/llvm/lib/Target/X86/X86RegisterInfo.td --- a/llvm/lib/Target/X86/X86RegisterInfo.td +++ b/llvm/lib/Target/X86/X86RegisterInfo.td @@ -638,3 +638,14 @@ let CopyCost = -1 in // Don't allow copying of tile registers def TILE : RegisterClass<"X86", [x86amx], 8192, (sequence "TMM%u", 0, 7)> {let Size = 8192;} + +//===----------------------------------------------------------------------===// +// Register categories. +// + +// The TILE and VK*PAIR registers may not be "fixed", but we don't want them +// anyway. +def FixedRegisters : RegisterCategory<[DEBUG_REG, CONTROL_REG, CCR, FPCCR, + DFCCR, TILE, VK1PAIR, VK2PAIR, VK4PAIR, + VK8PAIR, VK16PAIR]>; +def GeneralPurposeRegisters : RegisterCategory<[GR64, GR32, GR16, GR8]>; diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs-fmod.ll b/llvm/test/CodeGen/X86/zero-call-used-regs-fmod.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/zero-call-used-regs-fmod.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s + +define dso_local float @foo() local_unnamed_addr #0 { +; CHECK-LABEL: foo: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: xorps %xmm0, %xmm0 +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: callq fmod +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %call = call nnan ninf double @fmod(double 0.000000e+00, double 0.000000e+00) #2 + %conv = fptrunc double %call to float + ret float %conv +} + +declare dso_local double @fmod(double, double) local_unnamed_addr #1 + +attributes #0 = { mustprogress nofree nounwind uwtable willreturn "frame-pointer"="none" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "zero-call-used-regs"="used" } +attributes #1 = { mustprogress nofree nounwind willreturn "frame-pointer"="none" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "zero-call-used-regs"="used" } +attributes #2 = { nounwind } diff --git a/llvm/test/CodeGen/X86/zero-call-used-regs.ll b/llvm/test/CodeGen/X86/zero-call-used-regs.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/zero-call-used-regs.ll @@ -0,0 +1,284 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s --check-prefix=I386 +; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s --check-prefix=X86-64 + +@result = dso_local global i32 0, align 4 + +define dso_local i32 @skip(i32 returned %x) local_unnamed_addr #0 "zero-call-used-regs"="skip" { +; I386-LABEL: skip: +; I386: # %bb.0: # %entry +; I386-NEXT: movl {{[0-9]+}}(%esp), %eax +; I386-NEXT: retl +; +; X86-64-LABEL: skip: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: movl %edi, %eax +; X86-64-NEXT: retq + +entry: + ret i32 %x +} + +define dso_local i32 @used_gpr_arg(i32 returned %x) local_unnamed_addr #0 "zero-call-used-regs"="used-gpr-arg" { +; I386-LABEL: used_gpr_arg: +; I386: # %bb.0: # %entry +; I386-NEXT: movl {{[0-9]+}}(%esp), %eax +; I386-NEXT: retl +; +; X86-64-LABEL: used_gpr_arg: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: movl %edi, %eax +; X86-64-NEXT: xorl %edi, %edi +; X86-64-NEXT: retq + +entry: + ret i32 %x +} + +define dso_local i32 @used_gpr(i32 returned %x) local_unnamed_addr #0 "zero-call-used-regs"="used-gpr" { +; I386-LABEL: used_gpr: +; I386: # %bb.0: # %entry +; I386-NEXT: movl {{[0-9]+}}(%esp), %eax +; I386-NEXT: retl +; +; X86-64-LABEL: used_gpr: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: movl %edi, %eax +; X86-64-NEXT: xorl %edi, %edi +; X86-64-NEXT: retq + +entry: + ret i32 %x +} + +define dso_local i32 @used_arg(i32 returned %x) local_unnamed_addr #0 "zero-call-used-regs"="used-arg" { +; I386-LABEL: used_arg: +; I386: # %bb.0: # %entry +; I386-NEXT: movl {{[0-9]+}}(%esp), %eax +; I386-NEXT: retl +; +; X86-64-LABEL: used_arg: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: movl %edi, %eax +; X86-64-NEXT: xorl %edi, %edi +; X86-64-NEXT: retq + +entry: + ret i32 %x +} + +define dso_local i32 @used(i32 returned %x) local_unnamed_addr #0 "zero-call-used-regs"="used" { +; I386-LABEL: used: +; I386: # %bb.0: # %entry +; I386-NEXT: movl {{[0-9]+}}(%esp), %eax +; I386-NEXT: retl +; +; X86-64-LABEL: used: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: movl %edi, %eax +; X86-64-NEXT: xorl %edi, %edi +; X86-64-NEXT: retq + +entry: + ret i32 %x +} + +define dso_local i32 @all_gpr_arg(i32 returned %x) local_unnamed_addr #0 "zero-call-used-regs"="all-gpr-arg" { +; I386-LABEL: all_gpr_arg: +; I386: # %bb.0: # %entry +; I386-NEXT: movl {{[0-9]+}}(%esp), %eax +; I386-NEXT: xorl %ecx, %ecx +; I386-NEXT: xorl %edx, %edx +; I386-NEXT: retl +; +; X86-64-LABEL: all_gpr_arg: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: movl %edi, %eax +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: xorl %edi, %edi +; X86-64-NEXT: xorl %edx, %edx +; X86-64-NEXT: xorl %esi, %esi +; X86-64-NEXT: xorl %r8d, %r8d +; X86-64-NEXT: xorl %r9d, %r9d +; X86-64-NEXT: retq + +entry: + ret i32 %x +} + +define dso_local i32 @all_gpr(i32 returned %x) local_unnamed_addr #0 "zero-call-used-regs"="all-gpr" { +; I386-LABEL: all_gpr: +; I386: # %bb.0: # %entry +; I386-NEXT: movl {{[0-9]+}}(%esp), %eax +; I386-NEXT: xorl %ebp, %ebp +; I386-NEXT: xorl %ebx, %ebx +; I386-NEXT: xorl %ecx, %ecx +; I386-NEXT: xorl %edi, %edi +; I386-NEXT: xorl %edx, %edx +; I386-NEXT: xorl %esi, %esi +; I386-NEXT: retl +; +; X86-64-LABEL: all_gpr: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: movl %edi, %eax +; X86-64-NEXT: xorl %ebp, %ebp +; X86-64-NEXT: xorl %ebx, %ebx +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: xorl %edi, %edi +; X86-64-NEXT: xorl %edx, %edx +; X86-64-NEXT: xorl %esi, %esi +; X86-64-NEXT: xorl %r8d, %r8d +; X86-64-NEXT: xorl %r9d, %r9d +; X86-64-NEXT: xorl %r10d, %r10d +; X86-64-NEXT: xorl %r11d, %r11d +; X86-64-NEXT: xorl %r12d, %r12d +; X86-64-NEXT: xorl %r13d, %r13d +; X86-64-NEXT: xorl %r14d, %r14d +; X86-64-NEXT: xorl %r15d, %r15d +; X86-64-NEXT: retq + +entry: + ret i32 %x +} + +define dso_local i32 @all_arg(i32 returned %x) local_unnamed_addr #0 "zero-call-used-regs"="all-arg" { +; I386-LABEL: all_arg: +; I386: # %bb.0: # %entry +; I386-NEXT: movl {{[0-9]+}}(%esp), %eax +; I386-NEXT: xorl %ecx, %ecx +; I386-NEXT: xorl %edx, %edx +; I386-NEXT: retl +; +; X86-64-LABEL: all_arg: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: movl %edi, %eax +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: xorl %edi, %edi +; X86-64-NEXT: xorl %edx, %edx +; X86-64-NEXT: xorl %esi, %esi +; X86-64-NEXT: xorps %xmm0, %xmm0 +; X86-64-NEXT: xorps %xmm1, %xmm1 +; X86-64-NEXT: xorps %xmm2, %xmm2 +; X86-64-NEXT: xorps %xmm3, %xmm3 +; X86-64-NEXT: xorps %xmm4, %xmm4 +; X86-64-NEXT: xorps %xmm5, %xmm5 +; X86-64-NEXT: xorps %xmm6, %xmm6 +; X86-64-NEXT: xorps %xmm7, %xmm7 +; X86-64-NEXT: xorl %r8d, %r8d +; X86-64-NEXT: xorl %r9d, %r9d +; X86-64-NEXT: retq + +entry: + ret i32 %x +} + +define dso_local i32 @all(i32 returned %x) local_unnamed_addr #0 "zero-call-used-regs"="all" { +; I386-LABEL: all: +; I386: # %bb.0: # %entry +; I386-NEXT: movl {{[0-9]+}}(%esp), %eax +; I386-NEXT: fldz +; I386-NEXT: fldz +; I386-NEXT: fldz +; I386-NEXT: fldz +; I386-NEXT: fldz +; I386-NEXT: fldz +; I386-NEXT: fldz +; I386-NEXT: fstp %st(0) +; I386-NEXT: fstp %st(0) +; I386-NEXT: fstp %st(0) +; I386-NEXT: fstp %st(0) +; I386-NEXT: fstp %st(0) +; I386-NEXT: fstp %st(0) +; I386-NEXT: fstp %st(0) +; I386-NEXT: xorl %ebp, %ebp +; I386-NEXT: xorl %ebx, %ebx +; I386-NEXT: xorl %ecx, %ecx +; I386-NEXT: xorl %edi, %edi +; I386-NEXT: xorl %edx, %edx +; I386-NEXT: xorl %esi, %esi +; I386-NEXT: xorps %xmm0, %xmm0 +; I386-NEXT: xorps %xmm1, %xmm1 +; I386-NEXT: xorps %xmm2, %xmm2 +; I386-NEXT: xorps %xmm3, %xmm3 +; I386-NEXT: xorps %xmm4, %xmm4 +; I386-NEXT: xorps %xmm5, %xmm5 +; I386-NEXT: xorps %xmm6, %xmm6 +; I386-NEXT: xorps %xmm7, %xmm7 +; I386-NEXT: retl +; +; X86-64-LABEL: all: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: movl %edi, %eax +; X86-64-NEXT: fldz +; X86-64-NEXT: fldz +; X86-64-NEXT: fldz +; X86-64-NEXT: fldz +; X86-64-NEXT: fldz +; X86-64-NEXT: fldz +; X86-64-NEXT: fldz +; X86-64-NEXT: fldz +; X86-64-NEXT: fstp %st(0) +; X86-64-NEXT: fstp %st(0) +; X86-64-NEXT: fstp %st(0) +; X86-64-NEXT: fstp %st(0) +; X86-64-NEXT: fstp %st(0) +; X86-64-NEXT: fstp %st(0) +; X86-64-NEXT: fstp %st(0) +; X86-64-NEXT: fstp %st(0) +; X86-64-NEXT: xorl %ebp, %ebp +; X86-64-NEXT: xorl %ebx, %ebx +; X86-64-NEXT: xorl %ecx, %ecx +; X86-64-NEXT: xorl %edi, %edi +; X86-64-NEXT: xorl %edx, %edx +; X86-64-NEXT: xorl %esi, %esi +; X86-64-NEXT: xorps %xmm0, %xmm0 +; X86-64-NEXT: xorps %xmm1, %xmm1 +; X86-64-NEXT: xorps %xmm2, %xmm2 +; X86-64-NEXT: xorps %xmm3, %xmm3 +; X86-64-NEXT: xorps %xmm4, %xmm4 +; X86-64-NEXT: xorps %xmm5, %xmm5 +; X86-64-NEXT: xorps %xmm6, %xmm6 +; X86-64-NEXT: xorps %xmm7, %xmm7 +; X86-64-NEXT: xorps %xmm8, %xmm8 +; X86-64-NEXT: xorps %xmm9, %xmm9 +; X86-64-NEXT: xorps %xmm10, %xmm10 +; X86-64-NEXT: xorps %xmm11, %xmm11 +; X86-64-NEXT: xorps %xmm12, %xmm12 +; X86-64-NEXT: xorps %xmm13, %xmm13 +; X86-64-NEXT: xorps %xmm14, %xmm14 +; X86-64-NEXT: xorps %xmm15, %xmm15 +; X86-64-NEXT: xorl %r8d, %r8d +; X86-64-NEXT: xorl %r9d, %r9d +; X86-64-NEXT: xorl %r10d, %r10d +; X86-64-NEXT: xorl %r11d, %r11d +; X86-64-NEXT: xorl %r12d, %r12d +; X86-64-NEXT: xorl %r13d, %r13d +; X86-64-NEXT: xorl %r14d, %r14d +; X86-64-NEXT: xorl %r15d, %r15d +; X86-64-NEXT: retq + +entry: + ret i32 %x +} + +; Don't emit zeroing registers in "main" function. +define dso_local i32 @main() local_unnamed_addr #1 { +; I386-LABEL: main: +; I386: # %bb.0: # %entry +; I386-NEXT: movl $2, result +; I386-NEXT: xorl %eax, %eax +; I386-NEXT: retl +; +; X86-64-LABEL: main: +; X86-64: # %bb.0: # %entry +; X86-64-NEXT: movl $2, result(%rip) +; X86-64-NEXT: xorl %eax, %eax +; X86-64-NEXT: retq + +entry: + store volatile i32 2, i32* @result, align 4 + ret i32 0 +} + +attributes #0 = { mustprogress nofree norecurse nosync nounwind readnone uwtable willreturn "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { nofree norecurse nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } diff --git a/llvm/utils/TableGen/CodeGenRegisters.h b/llvm/utils/TableGen/CodeGenRegisters.h --- a/llvm/utils/TableGen/CodeGenRegisters.h +++ b/llvm/utils/TableGen/CodeGenRegisters.h @@ -476,6 +476,26 @@ static void computeSubClasses(CodeGenRegBank&); }; + // Register categories are used when we need to deterine the category a + // register falls into (GPR, vector, fixed, etc.) without having to know + // specific information about the target architecture. + class CodeGenRegisterCategory { + Record *TheDef; + std::string Name; + std::list Classes; + + public: + CodeGenRegisterCategory(CodeGenRegBank &, Record *R); + CodeGenRegisterCategory(CodeGenRegisterCategory &) = delete; + + // Return the Record that defined this class, or NULL if the class was + // created by TableGen. + Record *getDef() const { return TheDef; } + + std::string getName() const { return Name; } + std::list getClasses() const { return Classes; } + }; + // Register units are used to model interference and register pressure. // Every register is assigned one or more register units such that two // registers overlap if and only if they have a register unit in common. @@ -559,6 +579,13 @@ typedef std::map RCKeyMap; RCKeyMap Key2RC; + // Register categories. + std::list RegCategories; + DenseMap Def2RCat; + using RCatKeyMap = + std::map; + RCatKeyMap Key2RCat; + // Remember each unique set of register units. Initially, this contains a // unique set for each register class. Simliar sets are coalesced with // pruneUnitSets and new supersets are inferred during computeRegUnitSets. @@ -719,6 +746,14 @@ return RegClasses; } + std::list &getRegCategories() { + return RegCategories; + } + + const std::list &getRegCategories() const { + return RegCategories; + } + // Find a register class from its def. CodeGenRegisterClass *getRegClass(const Record *) const; diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp --- a/llvm/utils/TableGen/CodeGenRegisters.cpp +++ b/llvm/utils/TableGen/CodeGenRegisters.cpp @@ -1105,6 +1105,17 @@ std::back_inserter(RegUnits)); } +//===----------------------------------------------------------------------===// +// CodeGenRegisterCategory +//===----------------------------------------------------------------------===// + +CodeGenRegisterCategory::CodeGenRegisterCategory(CodeGenRegBank &RegBank, + Record *R) + : TheDef(R), Name(std::string(R->getName())) { + for (Record *RegClass : R->getValueAsListOfDefs("Classes")) + Classes.push_back(RegBank.getRegClass(RegClass)); +} + //===----------------------------------------------------------------------===// // CodeGenRegBank //===----------------------------------------------------------------------===// @@ -1222,6 +1233,12 @@ for (auto &RC : RegClasses) RC.EnumValue = i++; CodeGenRegisterClass::computeSubClasses(*this); + + // Read in the register category definitions. + std::vector RCats = + Records.getAllDerivedDefinitions("RegisterCategory"); + for (auto *R : RCats) + RegCategories.emplace_back(*this, R); } // Create a synthetic CodeGenSubRegIndex without a corresponding Record. diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp --- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp +++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp @@ -1176,6 +1176,10 @@ << "unsigned RegUnit) const override;\n" << " ArrayRef getRegMaskNames() const override;\n" << " ArrayRef getRegMasks() const override;\n" + << " bool isGeneralPurposeRegister(const MachineFunction &, " + << "MCRegister) const override;\n" + << " bool isFixedRegister(const MachineFunction &, " + << "MCRegister) const override;\n" << " /// Devirtualized TargetFrameLowering.\n" << " static const " << TargetName << "FrameLowering *getFrameLowering(\n" << " const MachineFunction &MF);\n" @@ -1620,6 +1624,36 @@ } OS << "}\n\n"; + const std::list &RegCategories = + RegBank.getRegCategories(); + OS << "bool " << ClassName << "::\n" + << "isGeneralPurposeRegister(const MachineFunction &MF, " + << "MCRegister PhysReg) const {\n" + << " return\n"; + for (const CodeGenRegisterCategory &Category : RegCategories) + if (Category.getName() == "GeneralPurposeRegisters") { + for (const CodeGenRegisterClass *RC : Category.getClasses()) + OS << " " << RC->getQualifiedName() + << "RegClass.contains(PhysReg) ||\n"; + break; + } + OS << " false;\n"; + OS << "}\n\n"; + + OS << "bool " << ClassName << "::\n" + << "isFixedRegister(const MachineFunction &MF, " + << "MCRegister PhysReg) const {\n" + << " return\n"; + for (const CodeGenRegisterCategory &Category : RegCategories) + if (Category.getName() == "FixedRegisters") { + for (const CodeGenRegisterClass *RC : Category.getClasses()) + OS << " " << RC->getQualifiedName() + << "RegClass.contains(PhysReg) ||\n"; + break; + } + OS << " false;\n"; + OS << "}\n\n"; + OS << "ArrayRef " << ClassName << "::getRegMaskNames() const {\n"; if (!CSRSets.empty()) {