Index: clang/docs/ReleaseNotes.rst =================================================================== --- clang/docs/ReleaseNotes.rst +++ clang/docs/ReleaseNotes.rst @@ -177,6 +177,17 @@ - Support for ``AVX512-FP16`` instructions has been added. +AArch64 Support in Clang +------------------------ + +- The -mtune flag is no longer ignored for AArch64. It is now possible to +tune code generation for a particular CPU with -mtune without setting any +architectural features. For example, compiling with +"-mcpu=generic -mtune=cortex-a57" will not enable any Cortex-A57 specific +architecture features, but will enable certain optimizations specific to +Cortex-A57 CPUs and enable the use of a more accurate scheduling model. + + Internal API Changes -------------------- Index: clang/lib/Driver/ToolChains/Clang.cpp =================================================================== --- clang/lib/Driver/ToolChains/Clang.cpp +++ clang/lib/Driver/ToolChains/Clang.cpp @@ -1833,6 +1833,21 @@ } AddAAPCSVolatileBitfieldArgs(Args, CmdArgs); + + if (const Arg *A = Args.getLastArg(clang::driver::options::OPT_mtune_EQ)) { + StringRef Name = A->getValue(); + + std::string TuneCPU; + if (Name == "native") + TuneCPU = std::string(llvm::sys::getHostCPUName()); + else + TuneCPU = std::string(Name); + + if (!TuneCPU.empty()) { + CmdArgs.push_back("-tune-cpu"); + CmdArgs.push_back(Args.MakeArgString(TuneCPU)); + } + } } void Clang::AddMIPSTargetArgs(const ArgList &Args, Index: clang/test/Driver/aarch64-mtune.c =================================================================== --- /dev/null +++ clang/test/Driver/aarch64-mtune.c @@ -0,0 +1,42 @@ +// Ensure we support the -mtune flag. + +// There shouldn't be a default -mtune. +// RUN: %clang -target aarch64-unknown-unknown -c -### %s 2>&1 \ +// RUN: | FileCheck %s -check-prefix=NOTUNE +// NOTUNE-NOT: "-tune-cpu" "generic" + +// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mtune=generic 2>&1 \ +// RUN: | FileCheck %s -check-prefix=GENERIC +// GENERIC: "-tune-cpu" "generic" + +// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mtune=neoverse-n1 2>&1 \ +// RUN: | FileCheck %s -check-prefix=NEOVERSE-N1 +// NEOVERSE-N1: "-tune-cpu" "neoverse-n1" + +// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mtune=thunderx2t99 2>&1 \ +// RUN: | FileCheck %s -check-prefix=THUNDERX2T99 +// THUNDERX2T99: "-tune-cpu" "thunderx2t99" + +// Check interaction between march and mtune. + +// RUN: %clang -target aarch64-unknown-unknown -c -### %s -march=armv8-a 2>&1 \ +// RUN: | FileCheck %s -check-prefix=MARCHARMV8A +// MARCHARMV8A: "-target-cpu" "generic" +// MARCHARMV8A-NOT: "-tune-cpu" "generic" + +// RUN: %clang -target aarch64-unknown-unknown -c -### %s -march=armv8-a -mtune=cortex-a75 2>&1 \ +// RUN: | FileCheck %s -check-prefix=MARCHARMV8A-A75 +// MARCHARMV8A-A75: "-target-cpu" "generic" +// MARCHARMV8A-A75: "-tune-cpu" "cortex-a75" + +// Check interaction between mcpu and mtune. + +// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mcpu=thunderx 2>&1 \ +// RUN: | FileCheck %s -check-prefix=MCPUTHUNDERX +// MCPUTHUNDERX: "-target-cpu" "thunderx" +// MCPUTHUNDERX-NOT: "-tune-cpu" + +// RUN: %clang -target aarch64-unknown-unknown -c -### %s -mcpu=cortex-a75 -mtune=cortex-a57 2>&1 \ +// RUN: | FileCheck %s -check-prefix=MCPUA75-MTUNEA57 +// MCPUA75-MTUNEA57: "-target-cpu" "cortex-a75" +// MCPUA75-MTUNEA57: "-tune-cpu" "cortex-a57" Index: llvm/docs/ReleaseNotes.rst =================================================================== --- llvm/docs/ReleaseNotes.rst +++ llvm/docs/ReleaseNotes.rst @@ -74,6 +74,10 @@ ------------------------------ * Added support for the Armv9-A, Armv9.1-A and Armv9.2-A architectures. +* The compiler now recognises the "tune-cpu" function attribute to support +the use of the -mtune frontend flag. This allows certain scheduling features +and optimisations to be enabled independently of the architecture. If the +"tune-cpu" attribute is absent it tunes according to the "target-cpu". Changes to the ARM Backend -------------------------- Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -297,7 +297,8 @@ /// passed in feature string so that we can use initializer lists for /// subtarget initialization. AArch64Subtarget &initializeSubtargetDependencies(StringRef FS, - StringRef CPUString); + StringRef CPUString, + StringRef TuneCPUString); /// Initialize properties based on the selected processor family. void initializeProperties(); @@ -306,8 +307,8 @@ /// This constructor initializes the data members to match that /// of the specified triple. AArch64Subtarget(const Triple &TT, const std::string &CPU, - const std::string &FS, const TargetMachine &TM, - bool LittleEndian, + const std::string &TuneCPU, const std::string &FS, + const TargetMachine &TM, bool LittleEndian, unsigned MinSVEVectorSizeInBitsOverride = 0, unsigned MaxSVEVectorSizeInBitsOverride = 0); Index: llvm/lib/Target/AArch64/AArch64Subtarget.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -50,15 +50,17 @@ static cl::opt UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen.")); -AArch64Subtarget & -AArch64Subtarget::initializeSubtargetDependencies(StringRef FS, - StringRef CPUString) { +AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies( + StringRef FS, StringRef CPUString, StringRef TuneCPUString) { // Determine default and user-specified characteristics if (CPUString.empty()) CPUString = "generic"; - ParseSubtargetFeatures(CPUString, /*TuneCPU*/ CPUString, FS); + if (TuneCPUString.empty()) + TuneCPUString = CPUString; + + ParseSubtargetFeatures(CPUString, TuneCPUString, FS); initializeProperties(); return *this; @@ -197,18 +199,20 @@ } AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, + const std::string &TuneCPU, const std::string &FS, const TargetMachine &TM, bool LittleEndian, unsigned MinSVEVectorSizeInBitsOverride, unsigned MaxSVEVectorSizeInBitsOverride) - : AArch64GenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), + : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS), ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()), CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()), IsLittle(LittleEndian), MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride), MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT), - FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS, CPU)), - TSInfo(), TLInfo(TM, *this) { + FrameLowering(), + InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)), TSInfo(), + TLInfo(TM, *this) { if (AArch64::isX18ReservedByDefault(TT)) ReserveXRegister.set(18); Index: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -355,10 +355,13 @@ const AArch64Subtarget * AArch64TargetMachine::getSubtargetImpl(const Function &F) const { Attribute CPUAttr = F.getFnAttribute("target-cpu"); + Attribute TuneAttr = F.getFnAttribute("tune-cpu"); Attribute FSAttr = F.getFnAttribute("target-features"); std::string CPU = CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; + std::string TuneCPU = + TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; std::string FS = FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; @@ -399,6 +402,7 @@ Key += "SVEMax"; Key += std::to_string(MaxSVEVectorSize); Key += CPU; + Key += TuneCPU; Key += FS; auto &I = SubtargetMap[Key]; @@ -407,8 +411,8 @@ // creation will depend on the TM and the code generation flags on the // function that reside in TargetOptions. resetTargetOptions(F); - I = std::make_unique(TargetTriple, CPU, FS, *this, - isLittle, MinSVEVectorSize, + I = std::make_unique(TargetTriple, CPU, TuneCPU, FS, + *this, isLittle, MinSVEVectorSize, MaxSVEVectorSize); } return I.get(); Index: llvm/unittests/Target/AArch64/InstSizes.cpp =================================================================== --- llvm/unittests/Target/AArch64/InstSizes.cpp +++ llvm/unittests/Target/AArch64/InstSizes.cpp @@ -29,6 +29,7 @@ std::unique_ptr createInstrInfo(TargetMachine *TM) { AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), + std::string(TM->getTargetCPU()), std::string(TM->getTargetFeatureString()), *TM, /* isLittle */ false); return std::make_unique(ST); Index: llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp =================================================================== --- llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp +++ llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp @@ -26,6 +26,7 @@ std::unique_ptr createInstrInfo(TargetMachine *TM) { AArch64Subtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), + std::string(TM->getTargetCPU()), std::string(TM->getTargetFeatureString()), *TM, /* isLittle */ false); return std::make_unique(ST);