diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -116,6 +116,10 @@ # define LANGBUILTIN(ID, TYPE, ATTRS, BUILTIN_LANG) BUILTIN(ID, TYPE, ATTRS) #endif +// Builtins for checking CPU features based on the GCC builtins. +BUILTIN(__builtin_cpu_supports, "bcC*", "nc") +BUILTIN(__builtin_cpu_is, "bcC*", "nc") + // Standard libc/libm functions: BUILTIN(__builtin_atan2 , "ddd" , "Fne") BUILTIN(__builtin_atan2f, "fff" , "Fne") diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -26,12 +26,9 @@ # define TARGET_HEADER_BUILTIN(ID, TYPE, ATTRS, HEADER, LANG, FEATURE) BUILTIN(ID, TYPE, ATTRS) #endif -// Miscellaneous builtin for checking x86 cpu features. // TODO: Make this somewhat generic so that other backends // can use it? BUILTIN(__builtin_cpu_init, "v", "n") -BUILTIN(__builtin_cpu_supports, "bcC*", "nc") -BUILTIN(__builtin_cpu_is, "bcC*", "nc") // Undefined Values // diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1398,6 +1398,11 @@ return getTriple().isOSBinFormatELF() && !getTriple().isOSFuchsia(); } + // Identify whether this target supports __builtin_cpu_supports and + // __builtin_cpu_is. + virtual bool supportsCpuSupports() const { return false; } + virtual bool supportsCpuIs() const { return false; } + // Validate the contents of the __builtin_cpu_supports(const char*) // argument. virtual bool validateCpuSupports(StringRef Name) const { return false; } diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -356,6 +356,12 @@ bool isSPRegName(StringRef RegName) const override { return RegName.equals("r1") || RegName.equals("x1"); } + // We support __builtin_cpu_supports/__builtin_cpu_is on targets that + // have GLIBC since it is GLIBC that provides the HWCAP[2] in the auxv. + bool supportsCpuSupports() const override { return getTriple().isOSGlibc(); } + bool supportsCpuIs() const override { return getTriple().isOSGlibc(); } + bool validateCpuSupports(StringRef Feature) const override; + bool validateCpuIs(StringRef Name) const override; }; class LLVM_LIBRARY_VISIBILITY PPC32TargetInfo : public PPCTargetInfo { diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -858,3 +858,17 @@ return llvm::ArrayRef(BuiltinInfo, clang::PPC::LastTSBuiltin - Builtin::FirstTSBuiltin); } + +bool PPCTargetInfo::validateCpuSupports(StringRef FeatureStr) const { +#define PPC_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN) .Case(NAME, true) + return llvm::StringSwitch(FeatureStr) +#include "llvm/TargetParser/PPCTargetParser.def" + .Default(false); +} + +bool PPCTargetInfo::validateCpuIs(StringRef CPUName) const { +#define PPC_CPU(NAME, NUM) .Case(NAME, true) + return llvm::StringSwitch(CPUName) +#include "llvm/TargetParser/PPCTargetParser.def" + .Default(false); +} diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -206,6 +206,10 @@ return RegName.equals("esp") || RegName.equals("rsp"); } + bool supportsCpuSupports() const override { return true; } + + bool supportsCpuIs() const override { return true; } + bool validateCpuSupports(StringRef FeatureStr) const override; bool validateCpuIs(StringRef FeatureStr) const override; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -33,6 +33,7 @@ #include "llvm/ADT/APInt.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/InlineAsm.h" @@ -13190,9 +13191,9 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E) { - if (BuiltinID == X86::BI__builtin_cpu_is) + if (BuiltinID == Builtin::BI__builtin_cpu_is) return EmitX86CpuIs(E); - if (BuiltinID == X86::BI__builtin_cpu_supports) + if (BuiltinID == Builtin::BI__builtin_cpu_supports) return EmitX86CpuSupports(E); if (BuiltinID == X86::BI__builtin_cpu_init) return EmitX86CpuInit(); @@ -15692,6 +15693,38 @@ // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we // call __builtin_readcyclecounter. + case Builtin::BI__builtin_cpu_is: { + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast(CPUExpr)->getString(); + unsigned NumCPUID = StringSwitch(CPUStr) +#define PPC_CPU(Name, NumericID) .Case(Name, NumericID) +#include "llvm/TargetParser/PPCTargetParser.def" + .Default(-1U); + Value *Op0 = + llvm::ConstantInt::get(Int32Ty, (unsigned)TargetLibraryInfo::FA_CPUID); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::fixed_addr_ld); + Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_is"); + return Builder.CreateICmpEQ(TheCall, + llvm::ConstantInt::get(Int32Ty, NumCPUID)); + } + case Builtin::BI__builtin_cpu_supports: { + unsigned FeatureWord; + unsigned BitMask; + const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts(); + StringRef CPUStr = cast(CPUExpr)->getString(); + std::tie(FeatureWord, BitMask) = + StringSwitch>(CPUStr) +#define PPC_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \ + .Case(Name, {(unsigned)TargetLibraryInfo::FA_WORD, Bitmask}) +#include "llvm/TargetParser/PPCTargetParser.def" + .Default({0, 0}); + Value *Op0 = llvm::ConstantInt::get(Int32Ty, FeatureWord); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::fixed_addr_ld); + Value *TheCall = Builder.CreateCall(F, {Op0}, "cpu_supports"); + Value *Mask = + Builder.CreateAnd(TheCall, llvm::ConstantInt::get(Int32Ty, BitMask)); + return Builder.CreateICmpNE(Mask, llvm::Constant::getNullValue(Int32Ty)); + } case PPC::BI__builtin_ppc_get_timebase: return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter)); diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -2061,6 +2061,66 @@ return false; } +/// SemaBuiltinCpuSupports - Handle __builtin_cpu_supports(char *). +/// This checks that the target supports __builtin_cpu_supports and +/// that the string argument is constant and valid. +static bool SemaBuiltinCpuSupports(Sema &S, const TargetInfo &TI, + const TargetInfo *AuxTI, CallExpr *TheCall) { + Expr *Arg = TheCall->getArg(0); + + const TargetInfo *TheTI = nullptr; + if (TI.supportsCpuSupports()) + TheTI = &TI; + else if (AuxTI && AuxTI->supportsCpuSupports()) + TheTI = AuxTI; + else + return S.Diag(TheCall->getBeginLoc(), diag::warn_builtin_unknown) + << "__builtin_cpu_supports"; + + // Check if the argument is a string literal. + if (!isa(Arg->IgnoreParenImpCasts())) + return S.Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal) + << Arg->getSourceRange(); + + // Check the contents of the string. + StringRef Feature = + cast(Arg->IgnoreParenImpCasts())->getString(); + if (!TheTI->validateCpuSupports(Feature)) + return S.Diag(TheCall->getBeginLoc(), diag::err_invalid_cpu_supports) + << Arg->getSourceRange(); + return false; +} + +/// SemaBuiltinCpuIs - Handle __builtin_cpu_is(char *). +/// This checks that the target supports __builtin_cpu_is and +/// that the string argument is constant and valid. +static bool SemaBuiltinCpuIs(Sema &S, const TargetInfo &TI, + const TargetInfo *AuxTI, CallExpr *TheCall) { + Expr *Arg = TheCall->getArg(0); + + const TargetInfo *TheTI = nullptr; + if (TI.supportsCpuIs()) + TheTI = &TI; + else if (AuxTI && AuxTI->supportsCpuIs()) + TheTI = AuxTI; + else + return S.Diag(TheCall->getBeginLoc(), diag::warn_builtin_unknown) + << "__builtin_cpu_is"; + + // Check if the argument is a string literal. + if (!isa(Arg->IgnoreParenImpCasts())) + return S.Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal) + << Arg->getSourceRange(); + + // Check the contents of the string. + StringRef Feature = + cast(Arg->IgnoreParenImpCasts())->getString(); + if (!TheTI->validateCpuIs(Feature)) + return S.Diag(TheCall->getBeginLoc(), diag::err_invalid_cpu_is) + << Arg->getSourceRange(); + return false; +} + ExprResult Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, CallExpr *TheCall) { @@ -2088,6 +2148,16 @@ } switch (BuiltinID) { + case Builtin::BI__builtin_cpu_supports: + if (SemaBuiltinCpuSupports(*this, Context.getTargetInfo(), + Context.getAuxTargetInfo(), TheCall)) + return ExprError(); + break; + case Builtin::BI__builtin_cpu_is: + if (SemaBuiltinCpuIs(*this, Context.getTargetInfo(), + Context.getAuxTargetInfo(), TheCall)) + return ExprError(); + break; case Builtin::BI__builtin___CFStringMakeConstantString: // CFStringMakeConstantString is currently not implemented for GOFF (i.e., // on z/OS) and for XCOFF (i.e., on AIX). Emit unsupported @@ -4838,47 +4908,6 @@ return false; } -/// SemaBuiltinCpuSupports - Handle __builtin_cpu_supports(char *). -/// This checks that the target supports __builtin_cpu_supports and -/// that the string argument is constant and valid. -static bool SemaBuiltinCpuSupports(Sema &S, const TargetInfo &TI, - CallExpr *TheCall) { - Expr *Arg = TheCall->getArg(0); - - // Check if the argument is a string literal. - if (!isa(Arg->IgnoreParenImpCasts())) - return S.Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal) - << Arg->getSourceRange(); - - // Check the contents of the string. - StringRef Feature = - cast(Arg->IgnoreParenImpCasts())->getString(); - if (!TI.validateCpuSupports(Feature)) - return S.Diag(TheCall->getBeginLoc(), diag::err_invalid_cpu_supports) - << Arg->getSourceRange(); - return false; -} - -/// SemaBuiltinCpuIs - Handle __builtin_cpu_is(char *). -/// This checks that the target supports __builtin_cpu_is and -/// that the string argument is constant and valid. -static bool SemaBuiltinCpuIs(Sema &S, const TargetInfo &TI, CallExpr *TheCall) { - Expr *Arg = TheCall->getArg(0); - - // Check if the argument is a string literal. - if (!isa(Arg->IgnoreParenImpCasts())) - return S.Diag(TheCall->getBeginLoc(), diag::err_expr_not_string_literal) - << Arg->getSourceRange(); - - // Check the contents of the string. - StringRef Feature = - cast(Arg->IgnoreParenImpCasts())->getString(); - if (!TI.validateCpuIs(Feature)) - return S.Diag(TheCall->getBeginLoc(), diag::err_invalid_cpu_is) - << Arg->getSourceRange(); - return false; -} - // Check if the rounding mode is legal. bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) { // Indicates if this instruction has rounding control or just SAE. @@ -5353,12 +5382,6 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall) { - if (BuiltinID == X86::BI__builtin_cpu_supports) - return SemaBuiltinCpuSupports(*this, TI, TheCall); - - if (BuiltinID == X86::BI__builtin_cpu_is) - return SemaBuiltinCpuIs(*this, TI, TheCall); - // Check for 32-bit only builtins on a 64-bit target. const llvm::Triple &TT = TI.getTriple(); if (TT.getArch() != llvm::Triple::x86 && isX86_32Builtin(BuiltinID)) diff --git a/clang/test/CodeGen/builtin-cpu-supports.c b/clang/test/CodeGen/builtin-cpu-supports.c --- a/clang/test/CodeGen/builtin-cpu-supports.c +++ b/clang/test/CodeGen/builtin-cpu-supports.c @@ -1,32 +1,55 @@ -// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm < %s| FileCheck %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm < %s | FileCheck %s \ +// RUN: --check-prefix=CHECK-X86 +// RUN: %clang_cc1 -triple ppc64le-linux-gnu -emit-llvm < %s | FileCheck %s \ +// RUN: --check-prefix=CHECK-PPC +#ifndef __PPC__ // Test that we have the structure definition, the gep offsets, the name of the // global, the bit grab, and the icmp correct. extern void a(const char *); -// CHECK: @__cpu_model = external dso_local global { i32, i32, i32, [1 x i32] } -// CHECK: @__cpu_features2 = external dso_local global i32 +// CHECK-X86: @__cpu_model = external dso_local global { i32, i32, i32, [1 x i32] } +// CHECK-X86: @__cpu_features2 = external dso_local global i32 int main(void) { __builtin_cpu_init(); - // CHECK: call void @__cpu_indicator_init + // CHECK-X86: call void @__cpu_indicator_init if (__builtin_cpu_supports("sse4.2")) a("sse4.2"); - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0) - // CHECK: [[AND:%[^ ]+]] = and i32 [[LOAD]], 256 - // CHECK: = icmp eq i32 [[AND]], 256 +// CHECK-X86: [[LOAD:%[^ ]+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0) +// CHECK-X86: [[AND:%[^ ]+]] = and i32 [[LOAD]], 256 +// CHECK-X86: = icmp eq i32 [[AND]], 256 if (__builtin_cpu_supports("gfni")) a("gfni"); - // CHECK: [[LOAD:%[^ ]+]] = load i32, ptr @__cpu_features2 - // CHECK: [[AND:%[^ ]+]] = and i32 [[LOAD]], 1 - // CHECK: = icmp eq i32 [[AND]], 1 +// CHECK-X86: [[LOAD:%[^ ]+]] = load i32, ptr @__cpu_features2 +// CHECK-X86: [[AND:%[^ ]+]] = and i32 [[LOAD]], 1 +// CHECK-X86: = icmp eq i32 [[AND]], 1 return 0; } -// CHECK: declare dso_local void @__cpu_indicator_init() +// CHECK-X86: declare dso_local void @__cpu_indicator_init() +#else +int test(int a) { +// CHECK-PPC: [[CPUSUP:%[^ ]+]] = call i32 @llvm.fixed.addr.ld(i32 1) +// CHECK-PPC: [[AND:%[^ ]+]] = and i32 [[CPUSUP]], 8388608 +// CHECK-PPC: icmp ne i32 [[AND]], 0 +// CHECK-PPC: [[CPUSUP2:%[^ ]+]] = call i32 @llvm.fixed.addr.ld(i32 0) +// CHECK-PPC: [[AND2:%[^ ]+]] = and i32 [[CPUSUP2]], 67108864 +// CHECK-PPC: icmp ne i32 [[AND2]], 0 +// CHECK-PPC: [[CPUID:%[^ ]+]] = call i32 @llvm.fixed.addr.ld(i32 2) +// CHECK-PPC: icmp eq i32 [[CPUID]], 39 + if (__builtin_cpu_supports("arch_3_00")) // HWCAP2 + return a; + else if (__builtin_cpu_supports("mmu")) // HWCAP + return a - 5; + else if (__builtin_cpu_is("power7")) // CPUID + return a + a; + return a + 5; +} +#endif diff --git a/clang/test/Sema/builtin-cpu-supports.c b/clang/test/Sema/builtin-cpu-supports.c --- a/clang/test/Sema/builtin-cpu-supports.c +++ b/clang/test/Sema/builtin-cpu-supports.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -fsyntax-only -triple x86_64-pc-linux-gnu -verify %s -// RUN: %clang_cc1 -fsyntax-only -triple powerpc64le-linux-gnu -verify %s +// RUN: %clang_cc1 -fsyntax-only -triple aarch64-linux-gnu -verify %s extern void a(const char *); @@ -21,11 +21,11 @@ (void)__builtin_cpu_is("x86-64-v3"); // expected-error {{invalid cpu name for builtin}} (void)__builtin_cpu_is("x86-64-v4"); // expected-error {{invalid cpu name for builtin}} #else - if (__builtin_cpu_supports("vsx")) // expected-error {{use of unknown builtin}} - a("vsx"); + if (__builtin_cpu_supports("aes")) // expected-error {{use of unknown builtin}} + a("aes"); - if (__builtin_cpu_is("pwr9")) // expected-error {{use of unknown builtin}} - a("pwr9"); + if (__builtin_cpu_is("cortex-x3")) // expected-error {{use of unknown builtin}} + a("cortex-x3"); #endif return 0; diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h --- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h +++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h @@ -246,6 +246,16 @@ BitVector OverrideAsUnavailable; public: + /// Possible values provided by the system library at a fixed address. These + /// are values that are set up by the OS, system library or dynamic loader + /// that usually provide information about the CPU/System/Environment within + /// which the program is being executed. + enum FixedAddrType { + FA_HWCAP, // The HWCAP word provided by libc. + FA_HWCAP2, // The HWCAP2 word provided by libc. + FA_CPUID // The CPUID word (or string) provided by libc. + }; + explicit TargetLibraryInfo(const TargetLibraryInfoImpl &Impl, std::optional F = std::nullopt) : Impl(&Impl), OverrideAsUnavailable(NumLibFuncs) { diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -27,6 +27,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/CodeGen/ComplexDeinterleavingPass.h" #include "llvm/CodeGen/DAGCombine.h" #include "llvm/CodeGen/ISDOpcodes.h" @@ -85,7 +86,6 @@ class MCExpr; class Module; class ProfileSummaryInfo; -class TargetLibraryInfo; class TargetMachine; class TargetRegisterClass; class TargetRegisterInfo; @@ -5238,6 +5238,12 @@ return false; } + /// If this function returns true, SelectionDAGBuilder emits a + /// LOAD_FIXED_ADDR for the particular value. + virtual bool useLoadFixedAddr(TargetLibraryInfo::FixedAddrType Type) const { + return false; + } + virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, const SDLoc &DL) const { llvm_unreachable("not implemented for this target"); diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -900,6 +900,12 @@ def int_stackprotector : DefaultAttrsIntrinsic<[], [llvm_ptr_ty, llvm_ptrptr_ty], []>; def int_stackguard : DefaultAttrsIntrinsic<[llvm_ptr_ty], [], []>; +// Load of a value provided by the system library at a fixed address. Used for +// accessing things like HWCAP word provided by GLIBC. +def int_fixed_addr_ld + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], + [IntrInaccessibleMemOnly, ImmArg>]>; + // A cover for instrumentation based profiling. def int_instrprof_cover : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty, llvm_i32_ty, llvm_i32_ty]>; diff --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def --- a/llvm/include/llvm/Support/TargetOpcodes.def +++ b/llvm/include/llvm/Support/TargetOpcodes.def @@ -145,6 +145,11 @@ /// additionally expand this pseudo after register allocation. HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD) +/// This pseudo-instruction loads a value provided by libc at afixed address. +/// For example, GLIBC provides HWCAP, HWCAP2 and CPUID at a fixed address +/// on some targets (for example PPC Linux). +HANDLE_TARGET_OPCODE(LOAD_FIXED_ADDR) + /// These are used to support call sites that must have the stack adjusted /// before the call (e.g. to initialize an argument passed by value). /// See llvm.call.preallocated.{setup,arg} in the LangRef for more details. diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td --- a/llvm/include/llvm/Target/Target.td +++ b/llvm/include/llvm/Target/Target.td @@ -1295,6 +1295,14 @@ let hasSideEffects = false; bit isPseudo = true; } +def LOAD_FIXED_ADDR : StandardPseudoInstruction { + let OutOperandList = (outs unknown:$dst); + let InOperandList = (ins i32imm:$a); + let mayLoad = true; + bit isReMaterializable = true; + let hasSideEffects = false; + bit isPseudo = true; +} def PREALLOCATED_SETUP : StandardPseudoInstruction { let OutOperandList = (outs); let InOperandList = (ins i32imm:$a); diff --git a/llvm/include/llvm/TargetParser/PPCTargetParser.def b/llvm/include/llvm/TargetParser/PPCTargetParser.def new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/TargetParser/PPCTargetParser.def @@ -0,0 +1,71 @@ +#ifndef PPC_FEATURE +#define PPC_FEATURE(NAME, DESC, ENUMNAME, ENUMVAL, HWCAPN) +#endif +#ifndef PPC_CPU +#define PPC_CPU(NAME, NUM) +#endif + +// PPC_FEATURE(Name, Description, EnumName, BitMask, FA_WORD) +PPC_FEATURE("4xxmac","4xx CPU has a Multiply Accumulator",PPCF_4XXMAC,0x02000000,FA_HWCAP) +PPC_FEATURE("altivec","CPU has a SIMD/Vector Unit",PPCF_ALTIVEC,0x10000000,FA_HWCAP) +PPC_FEATURE("arch_2_05","CPU supports ISA 205 (eg, POWER6)",PPCF_ARCH205,0x00001000,FA_HWCAP) +PPC_FEATURE("arch_2_06","CPU supports ISA 206 (eg, POWER7)",PPCF_ARCH206,0x00000100,FA_HWCAP) +PPC_FEATURE("arch_2_07","CPU supports ISA 207 (eg, POWER8)",PPCF_ARCH207,0x80000000,FA_HWCAP2) +PPC_FEATURE("arch_3_00","CPU supports ISA 30 (eg, POWER9)",PPCF_ARCH30,0x00800000,FA_HWCAP2) +PPC_FEATURE("arch_3_1","CPU supports ISA 31 (eg, POWER10)",PPCF_ARCH31,0x00040000,FA_HWCAP2) +PPC_FEATURE("archpmu","CPU supports the set of compatible performance monitoring events",PPCF_ARCHPMU,0x00000040,FA_HWCAP) +PPC_FEATURE("booke","CPU supports the Embedded ISA category",PPCF_BOOKE,0x00008000,FA_HWCAP) +PPC_FEATURE("cellbe","CPU has a CELL broadband engine",PPCF_CELLBE,0x00010000,FA_HWCAP) +PPC_FEATURE("darn","CPU supports the darn (deliver a random number) instruction",PPCF_DARN,0x00200000,FA_HWCAP2) +PPC_FEATURE("dfp","CPU has a decimal floating point unit",PPCF_DFP,0x00000400,FA_HWCAP) +PPC_FEATURE("dscr","CPU supports the data stream control register",PPCF_DSCR,0x20000000,FA_HWCAP2) +PPC_FEATURE("ebb","CPU supports event base branching",PPCF_EBB,0x10000000,FA_HWCAP2) +PPC_FEATURE("efpdouble","CPU has a SPE double precision floating point unit",PPCF_EFPDOUBLE,0x00200000,FA_HWCAP) +PPC_FEATURE("efpsingle","CPU has a SPE single precision floating point unit",PPCF_EFPSINGLE,0x00400000,FA_HWCAP) +PPC_FEATURE("fpu","CPU has a floating point unit",PPCF_FPU,0x08000000,FA_HWCAP) +PPC_FEATURE("htm","CPU has hardware transaction memory instructions",PPCF_HTM,0x40000000,FA_HWCAP2) +PPC_FEATURE("htm-nosc","Kernel aborts hardware transactions when a syscall is made",PPCF_HTM_NOSC,0x01000000,FA_HWCAP2) +PPC_FEATURE("htm-no-suspend","CPU supports hardware transaction memory but does not support the tsuspend instruction.",PPCF_HTM_NO_SUSPEND,0x00080000,FA_HWCAP2) +PPC_FEATURE("ic_snoop","CPU supports icache snooping capabilities",PPCF_IC_SNOOP,0x00002000,FA_HWCAP) +PPC_FEATURE("ieee128","CPU supports 128-bit IEEE binary floating point instructions",PPCF_IEEE128,0x00400000,FA_HWCAP2) +PPC_FEATURE("isel","CPU supports the integer select instruction",PPCF_ISEL,0x08000000,FA_HWCAP2) +PPC_FEATURE("mma","CPU supports the matrix-multiply assist instructions",PPCF_MMA,0x00020000,FA_HWCAP2) +PPC_FEATURE("mmu","CPU has a memory management unit",PPCF_MMU,0x04000000,FA_HWCAP) +PPC_FEATURE("notb","CPU does not have a timebase (eg, 601 and 403gx)",PPCF_NOTB,0x00100000,FA_HWCAP) +PPC_FEATURE("pa6t","CPU supports the PA Semi 6T CORE ISA",PPCF_PA6T,0x00000800,FA_HWCAP) +PPC_FEATURE("power4","CPU supports ISA 200 (eg, POWER4)",PPCF_POWER4,0x00080000,FA_HWCAP) +PPC_FEATURE("power5","CPU supports ISA 202 (eg, POWER5)",PPCF_POWER5,0x00040000,FA_HWCAP) +PPC_FEATURE("power5+","CPU supports ISA 203 (eg, POWER5+)",PPCF_POWER5P,0x00020000,FA_HWCAP) +PPC_FEATURE("power6x","CPU supports ISA 205 (eg, POWER6) extended opcodes mffgpr and mftgpr.",PPCF_POWER6X,0x00000200,FA_HWCAP) +PPC_FEATURE("ppc32","CPU supports 32-bit mode execution",PPCF_PPC32,0x80000000,FA_HWCAP) +PPC_FEATURE("ppc601","CPU supports the old POWER ISA (eg, 601)",PPCF_PPC601,0x20000000,FA_HWCAP) +PPC_FEATURE("ppc64","CPU supports 64-bit mode execution",PPCF_PPC64,0x40000000,FA_HWCAP) +PPC_FEATURE("ppcle","CPU supports a little-endian mode that uses address swizzling",PPCF_PPCLE,0x00000001,FA_HWCAP) +PPC_FEATURE("scv","Kernel supports system call vectored",PPCF_SCV,0x00100000,FA_HWCAP2) +PPC_FEATURE("smt","CPU support simultaneous multi-threading",PPCF_SMT,0x00004000,FA_HWCAP) +PPC_FEATURE("spe","CPU has a signal processing extension unit",PPCF_SPE,0x00800000,FA_HWCAP) +PPC_FEATURE("tar","CPU supports the target address register",PPCF_TAR,0x04000000,FA_HWCAP2) +PPC_FEATURE("true_le","CPU supports true little-endian mode",PPCF_TRUE_LE,0x00000002,FA_HWCAP) +PPC_FEATURE("ucache","CPU has unified I/D cache",PPCF_UCACHE,0x01000000,FA_HWCAP) +PPC_FEATURE("vcrypto","CPU supports the vector cryptography instructions",PPCF_VCRYPTO,0x02000000,FA_HWCAP2) +PPC_FEATURE("vsx","CPU supports the vector-scalar extension",PPCF_VSX,0x00000080,FA_HWCAP) + +// PPC_CPU(Name, NumericID) +PPC_CPU("power4",32) +PPC_CPU("ppc970",33) +PPC_CPU("power5",34) +PPC_CPU("power5+",35) +PPC_CPU("power6",36) +PPC_CPU("ppc-cell-be",37) +PPC_CPU("power6x",38) +PPC_CPU("power7",39) +PPC_CPU("ppca2",40) +PPC_CPU("ppc405",41) +PPC_CPU("ppc440",42) +PPC_CPU("ppc464",43) +PPC_CPU("ppc476",44) +PPC_CPU("power8",45) +PPC_CPU("power9",46) +PPC_CPU("power10",47) +#undef PPC_FEATURE +#undef PPC_CPU diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2736,6 +2736,17 @@ return SDValue(Node, 0); } +/// Create a LOAD_FIXED_ADDR node, and let it carry the target specific global +/// variable if there exists one. +static SDValue getLoadFixedAddr(SelectionDAG &DAG, const SDLoc &DL, + SDValue &Chain, + TargetLibraryInfo::FixedAddrType Type) { + MachineSDNode *Node = + DAG.getMachineNode(TargetOpcode::LOAD_FIXED_ADDR, DL, MVT::i32, + DAG.getTargetConstant(Type, DL, MVT::i32), Chain); + return SDValue(Node, 0); +} + /// Codegen a new tail for a stack protector check ParentMBB which has had its /// tail spliced into a stack protector check success bb. /// @@ -6773,6 +6784,15 @@ DAG.setRoot(Res); return; } + case Intrinsic::fixed_addr_ld: { + SDValue Chain = getRoot(); + uint64_t ConstArg = cast(I.getArgOperand(0))->getZExtValue(); + Res = getLoadFixedAddr(DAG, sdl, Chain, + (TargetLibraryInfo::FixedAddrType)ConstArg); + DAG.setRoot(Chain); + setValue(&I, Res); + return; + } case Intrinsic::objectsize: llvm_unreachable("llvm.objectsize.* should have been lowered already"); diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp --- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -1782,6 +1782,10 @@ PPCTargetStreamer *TS = static_cast(OutStreamer->getTargetStreamer()); + if (static_cast(TM).hasGlibcHWCAPAccess()) + OutStreamer->emitSymbolValue( + GetExternalSymbolSymbol("__parse_hwcap_and_convert_at_platform"), + MAI->getCodePointerSize()); emitGNUAttributes(M); if (!TOC.empty()) { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1122,6 +1122,7 @@ /// Override to support customized stack guard loading. bool useLoadStackGuardNode() const override; + bool useLoadFixedAddr(TargetLibraryInfo::FixedAddrType Type) const override; void insertSSPDeclarations(Module &M) const override; Value *getSDagStackGuard(const Module &M) const override; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1341,6 +1341,8 @@ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, isPPC64 ? Legal : Custom); + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Legal); + setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Legal); if (!isPPC64) { setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); @@ -17370,6 +17372,22 @@ return true; } +// Enable LOAD_FIXED_ADDR for the values that are available from +// libc. +bool PPCTargetLowering::useLoadFixedAddr( + TargetLibraryInfo::FixedAddrType Type) const { + if (!Subtarget.isTargetGlibc()) + return false; + + switch (Type) { + case TargetLibraryInfo::FA_HWCAP: + case TargetLibraryInfo::FA_HWCAP2: + case TargetLibraryInfo::FA_CPUID: + return true; + } + return false; +} + // Override to disable global variable loading on Linux and insert AIX canary // word declaration. void PPCTargetLowering::insertSSPDeclarations(Module &M) const { diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1102,6 +1102,7 @@ case PPC::ADDIStocHA8: case PPC::ADDItocL: case PPC::LOAD_STACK_GUARD: + case PPC::LOAD_FIXED_ADDR: case PPC::XXLXORz: case PPC::XXLXORspz: case PPC::XXLXORdpz: @@ -3127,6 +3128,32 @@ .addReg(Reg); return true; } + case TargetOpcode::LOAD_FIXED_ADDR: { + assert(Subtarget.isTargetLinux() && + "Only Linux target is expected to contain LOAD_FIXED_ADDR"); + int64_t Offset = 0; + const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2; + MI.setDesc(get(PPC::LWZ)); + uint64_t FAType = MI.getOperand(1).getImm(); + // The HWCAP and HWCAP2 word offsets are reversed on big endian Linux. + if ((FAType == TargetLibraryInfo::FA_HWCAP && Subtarget.isLittleEndian()) || + (FAType == TargetLibraryInfo::FA_HWCAP2 && !Subtarget.isLittleEndian())) + Offset = Subtarget.isPPC64() ? -0x7064 : -0x703C; + else if ((FAType == TargetLibraryInfo::FA_HWCAP2 && + Subtarget.isLittleEndian()) || + (FAType == TargetLibraryInfo::FA_HWCAP && + !Subtarget.isLittleEndian())) + Offset = Subtarget.isPPC64() ? -0x7068 : -0x7040; + else if (FAType == TargetLibraryInfo::FA_CPUID) + Offset = Subtarget.isPPC64() ? -0x705C : -0x7034; + assert(Offset && "Do not know the offset for this fixed addr load"); + MI.removeOperand(1); + Subtarget.getTargetMachine().setGlibcHWCAPAccess(); + MachineInstrBuilder(*MI.getParent()->getParent(), MI) + .addImm(Offset) + .addReg(Reg); + return true; + } case PPC::DFLOADf32: case PPC::DFLOADf64: case PPC::DFSTOREf32: diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h --- a/llvm/lib/Target/PowerPC/PPCSubtarget.h +++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h @@ -210,6 +210,7 @@ bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); } bool isTargetLinux() const { return TargetTriple.isOSLinux(); } + bool isTargetGlibc() const { return TargetTriple.isOSGlibc(); } bool isAIXABI() const { return TargetTriple.isOSAIX(); } bool isSVR4ABI() const { return !isAIXABI(); } diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.h b/llvm/lib/Target/PowerPC/PPCTargetMachine.h --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.h +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.h @@ -32,6 +32,7 @@ std::unique_ptr TLOF; PPCABI TargetABI; Endian Endianness = Endian::NOT_DETECTED; + mutable bool HasGlibcHWCAPAccess = false; mutable StringMap> SubtargetMap; @@ -64,6 +65,8 @@ const TargetSubtargetInfo *STI) const override; bool isELFv2ABI() const { return TargetABI == PPC_ABI_ELFv2; } + bool hasGlibcHWCAPAccess() const { return HasGlibcHWCAPAccess; } + void setGlibcHWCAPAccess(bool Val = true) const { HasGlibcHWCAPAccess = Val; } bool isPPC64() const { const Triple &TT = getTargetTriple(); return (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le); diff --git a/llvm/test/CodeGen/PowerPC/cpu-supports.ll b/llvm/test/CodeGen/PowerPC/cpu-supports.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/cpu-supports.ll @@ -0,0 +1,111 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64-linux-gnu < %s | FileCheck %s \ +; RUN: -check-prefix=BE64 +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc-linux-gnu < %s | FileCheck %s \ +; RUN: -check-prefix=BE32 +; RUN: llc -mcpu=pwr9 -ppc-asm-full-reg-names \ +; RUN: -mtriple=powerpc64le-linux-gnu < %s | FileCheck %s \ +; RUN: -check-prefix=LE +define dso_local signext i32 @test(i32 noundef signext %a) local_unnamed_addr #0 { +; BE64-LABEL: test: +; BE64: # %bb.0: # %entry +; BE64-NEXT: lwz r4, -28772(r13) +; BE64-NEXT: andis. r4, r4, 128 +; BE64-NEXT: bne cr0, .LBB0_3 +; BE64-NEXT: # %bb.1: # %if.else +; BE64-NEXT: lwz r4, -28776(r13) +; BE64-NEXT: andis. r4, r4, 1024 +; BE64-NEXT: bne cr0, .LBB0_4 +; BE64-NEXT: # %bb.2: # %if.else3 +; BE64-NEXT: lwz r4, -28764(r13) +; BE64-NEXT: cmplwi r4, 39 +; BE64-NEXT: addi r4, r3, 5 +; BE64-NEXT: slwi r3, r3, 1 +; BE64-NEXT: iseleq r3, r3, r4 +; BE64-NEXT: .LBB0_3: # %return +; BE64-NEXT: extsw r3, r3 +; BE64-NEXT: blr +; BE64-NEXT: .LBB0_4: # %if.then2 +; BE64-NEXT: addi r3, r3, -5 +; BE64-NEXT: extsw r3, r3 +; BE64-NEXT: blr +; +; BE32-LABEL: test: +; BE32: # %bb.0: # %entry +; BE32-NEXT: lwz r4, -28732(r2) +; BE32-NEXT: andis. r4, r4, 128 +; BE32-NEXT: bnelr cr0 +; BE32-NEXT: # %bb.1: # %if.else +; BE32-NEXT: lwz r4, -28736(r2) +; BE32-NEXT: andis. r4, r4, 1024 +; BE32-NEXT: bne cr0, .LBB0_3 +; BE32-NEXT: # %bb.2: # %if.else3 +; BE32-NEXT: lwz r4, -28724(r2) +; BE32-NEXT: cmplwi r4, 39 +; BE32-NEXT: addi r4, r3, 5 +; BE32-NEXT: slwi r3, r3, 1 +; BE32-NEXT: iseleq r3, r3, r4 +; BE32-NEXT: blr +; BE32-NEXT: .LBB0_3: # %if.then2 +; BE32-NEXT: addi r3, r3, -5 +; BE32-NEXT: blr +; +; LE-LABEL: test: +; LE: # %bb.0: # %entry +; LE-NEXT: lwz r4, -28776(r13) +; LE-NEXT: andis. r4, r4, 128 +; LE-NEXT: bne cr0, .LBB0_3 +; LE-NEXT: # %bb.1: # %if.else +; LE-NEXT: lwz r4, -28772(r13) +; LE-NEXT: andis. r4, r4, 1024 +; LE-NEXT: bne cr0, .LBB0_4 +; LE-NEXT: # %bb.2: # %if.else3 +; LE-NEXT: lwz r4, -28764(r13) +; LE-NEXT: cmplwi r4, 39 +; LE-NEXT: addi r4, r3, 5 +; LE-NEXT: slwi r3, r3, 1 +; LE-NEXT: iseleq r3, r3, r4 +; LE-NEXT: .LBB0_3: # %return +; LE-NEXT: extsw r3, r3 +; LE-NEXT: blr +; LE-NEXT: .LBB0_4: # %if.then2 +; LE-NEXT: addi r3, r3, -5 +; LE-NEXT: extsw r3, r3 +; LE-NEXT: blr +entry: + %cpu_supports = tail call i32 @llvm.fixed.addr.ld(i32 1) + %0 = and i32 %cpu_supports, 8388608 + %.not = icmp eq i32 %0, 0 + br i1 %.not, label %if.else, label %return + +if.else: ; preds = %entry + %cpu_supports1 = tail call i32 @llvm.fixed.addr.ld(i32 0) + %1 = and i32 %cpu_supports1, 67108864 + %.not12 = icmp eq i32 %1, 0 + br i1 %.not12, label %if.else3, label %if.then2 + +if.then2: ; preds = %if.else + %sub = add nsw i32 %a, -5 + br label %return + +if.else3: ; preds = %if.else + %cpu_is = tail call i32 @llvm.fixed.addr.ld(i32 2) + %2 = icmp eq i32 %cpu_is, 39 + br i1 %2, label %if.then4, label %if.end6 + +if.then4: ; preds = %if.else3 + %add = shl nsw i32 %a, 1 + br label %return + +if.end6: ; preds = %if.else3 + %add7 = add nsw i32 %a, 5 + br label %return + +return: ; preds = %entry, %if.end6, %if.then4, %if.then2 + %retval.0 = phi i32 [ %sub, %if.then2 ], [ %add, %if.then4 ], [ %add7, %if.end6 ], [ %a, %entry ] + ret i32 %retval.0 +} + +declare i32 @llvm.fixed.addr.ld(i32 immarg) #1