Index: include/llvm/CodeGen/CommandFlags.h =================================================================== --- include/llvm/CodeGen/CommandFlags.h +++ include/llvm/CodeGen/CommandFlags.h @@ -16,9 +16,14 @@ #ifndef LLVM_CODEGEN_COMMANDFLAGS_H #define LLVM_CODEGEN_COMMANDFLAGS_H +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCTargetOptionsCommandFlags.h" +#include "llvm//MC/SubtargetFeature.h" #include "llvm/Support/CodeGen.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Host.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include @@ -260,4 +265,44 @@ return Options; } +static inline std::pair getCPUFeaturesStr() { + std::string CPUStr; + + // If user asked for the 'native' CPU, autodetect here. If autodection fails, + // this will set the CPU to an empty string which tells the target to + // pick a basic default. + if (MCPU == "native") + CPUStr = sys::getHostCPUName(); + else + CPUStr = MCPU; + + SubtargetFeatures Features; + + // If user asked for the 'native' CPU, we need to autodetect features. + // This is necessary for x86 where the CPU might not support all the + // features the autodetected CPU name lists in the target. For example, + if (MCPU == "native") { + StringMap HostFeatures; + if (sys::getHostCPUFeatures(HostFeatures)) + for (auto &F : HostFeatures) + Features.AddFeature(F.first(), F.second); + } + + for (unsigned i = 0; i != MAttrs.size(); ++i) + Features.AddFeature(MAttrs[i]); + + return std::make_pair(CPUStr, Features.getString()); +} + +static inline void overrideFunctionAttributes(StringRef CPU, StringRef Features, + Module &M) { + for (auto &F : M) { + if (!CPU.empty()) + llvm::overrideFunctionAttribute("target-cpu", CPU, F); + + if (!Features.empty()) + llvm::overrideFunctionAttribute("target-features", Features, F); + } +} + #endif Index: include/llvm/IR/Function.h =================================================================== --- include/llvm/IR/Function.h +++ include/llvm/IR/Function.h @@ -584,6 +584,8 @@ return F ? &F->getValueSymbolTable() : nullptr; } +void overrideFunctionAttribute(StringRef Kind, StringRef Value, Function &F); + } // End llvm namespace #endif Index: lib/IR/Function.cpp =================================================================== --- lib/IR/Function.cpp +++ lib/IR/Function.cpp @@ -975,3 +975,16 @@ } setValueSubclassData(PDData); } + +void llvm::overrideFunctionAttribute(StringRef Kind, StringRef Value, + Function &F) { + auto &Ctx = F.getContext(); + AttributeSet Attrs = F.getAttributes(), AttrsToRemove; + + AttrsToRemove = + AttrsToRemove.addAttribute(Ctx, AttributeSet::FunctionIndex, Kind); + Attrs = Attrs.removeAttributes(Ctx, AttributeSet::FunctionIndex, + AttrsToRemove); + Attrs = Attrs.addAttribute(Ctx, AttributeSet::FunctionIndex, Kind, Value); + F.setAttributes(Attrs); +} Index: test/CodeGen/X86/llc-override-mcpu-mattr.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/llc-override-mcpu-mattr.ll @@ -0,0 +1,16 @@ +; RUN: llc < %s -march x86-64 -mcpu=broadwell | FileCheck %s +; RUN: llc < %s -march x86-64 -mattr=+avx2 | FileCheck %s + +; CHECK: vpsadbw %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} + +define <4 x i64> @foo1(<4 x i64> %s1, <4 x i64> %s2) #0 { +entry: + %0 = bitcast <4 x i64> %s1 to <32 x i8> + %1 = bitcast <4 x i64> %s2 to <32 x i8> + %2 = tail call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %0, <32 x i8> %1) + ret <4 x i64> %2 +} + +declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) + +attributes #0 = { "target-cpu"="core2" "target-features"="+ssse3,+cx16,+sse4.2,+sse4.1,+sse,+sse2,+sse3,+avx,+popcnt" } Index: test/Other/opt-override-mcpu-mattr.ll =================================================================== --- /dev/null +++ test/Other/opt-override-mcpu-mattr.ll @@ -0,0 +1,10 @@ +; RUN: opt < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx2 -S | FileCheck %s + +; CHECK: attributes #0 = { nounwind readnone ssp uwtable "target-cpu"="broadwell" "target-features"="+avx2" "use-soft-float"="false" } + +define i32 @foo1() #0 { +entry: + ret i32 0 +} + +attributes #0 = { nounwind readnone ssp uwtable "target-cpu"="core2" "target-features"="+ssse3,+cx16,+sse,+sse2,+sse3" "use-soft-float"="false" } Index: test/Transforms/SLPVectorizer/X86/call.ll =================================================================== --- test/Transforms/SLPVectorizer/X86/call.ll +++ test/Transforms/SLPVectorizer/X86/call.ll @@ -119,10 +119,10 @@ } -; CHECK: declare <2 x double> @llvm.sin.v2f64(<2 x double>) #0 -; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) #0 -; CHECK: declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) #0 -; CHECK: declare <2 x double> @llvm.exp2.v2f64(<2 x double>) #0 +; CHECK: declare <2 x double> @llvm.sin.v2f64(<2 x double>) [[ATTR0:#[0-9]+]] +; CHECK: declare <2 x double> @llvm.cos.v2f64(<2 x double>) [[ATTR0]] +; CHECK: declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) [[ATTR0]] +; CHECK: declare <2 x double> @llvm.exp2.v2f64(<2 x double>) [[ATTR0]] -; CHECK: attributes #0 = { nounwind readnone } +; CHECK: attributes [[ATTR0]] = { nounwind readnone } Index: tools/llc/llc.cpp =================================================================== --- tools/llc/llc.cpp +++ tools/llc/llc.cpp @@ -248,32 +248,7 @@ return 1; } - // Package up features to be passed to target/subtarget - std::string FeaturesStr; - if (!MAttrs.empty() || MCPU == "native") { - SubtargetFeatures Features; - - // If user asked for the 'native' CPU, we need to autodetect features. - // This is necessary for x86 where the CPU might not support all the - // features the autodetected CPU name lists in the target. For example, - // not all Sandybridge processors support AVX. - if (MCPU == "native") { - StringMap HostFeatures; - if (sys::getHostCPUFeatures(HostFeatures)) - for (auto &F : HostFeatures) - Features.AddFeature(F.first(), F.second); - } - - for (unsigned i = 0; i != MAttrs.size(); ++i) - Features.AddFeature(MAttrs[i]); - FeaturesStr = Features.getString(); - } - - // If user asked for the 'native' CPU, autodetect here. If autodection fails, - // this will set the CPU to an empty string which tells the target to - // pick a basic default. - if (MCPU == "native") - MCPU = sys::getHostCPUName(); + std::pair CPUFeatures = getCPUFeaturesStr(); CodeGenOpt::Level OLvl = CodeGenOpt::Default; switch (OptLevel) { @@ -294,8 +269,10 @@ Options.MCOptions.AsmVerbose = AsmVerbose; std::unique_ptr Target( - TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr, - Options, RelocModel, CMModel, OLvl)); + TheTarget->createTargetMachine(TheTriple.getTriple(), CPUFeatures.first, + CPUFeatures.second, Options, RelocModel, + CMModel, OLvl)); + assert(Target && "Could not allocate target machine!"); // If we don't have a module then just exit now. We do this down @@ -329,6 +306,9 @@ if (const DataLayout *DL = Target->getDataLayout()) M->setDataLayout(*DL); + // Override function attributes. + overrideFunctionAttributes(CPUFeatures.first, CPUFeatures.second, *M); + if (RelaxAll.getNumOccurrences() > 0 && FileType != TargetMachine::CGFT_ObjectFile) errs() << argv[0] Index: tools/opt/opt.cpp =================================================================== --- tools/opt/opt.cpp +++ tools/opt/opt.cpp @@ -264,7 +264,8 @@ } // Returns the TargetMachine instance or zero if no triple is provided. -static TargetMachine* GetTargetMachine(Triple TheTriple) { +static TargetMachine* GetTargetMachine(Triple TheTriple, StringRef CPUStr, + StringRef FeaturesStr) { std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(MArch, TheTriple, Error); @@ -273,32 +274,8 @@ return nullptr; } - // Package up features to be passed to target/subtarget - std::string FeaturesStr; - if (MAttrs.size() || MCPU == "native") { - SubtargetFeatures Features; - - // If user asked for the 'native' CPU, we need to autodetect features. - // This is necessary for x86 where the CPU might not support all the - // features the autodetected CPU name lists in the target. For example, - // not all Sandybridge processors support AVX. - if (MCPU == "native") { - StringMap HostFeatures; - if (sys::getHostCPUFeatures(HostFeatures)) - for (auto &F : HostFeatures) - Features.AddFeature(F.first(), F.second); - } - - for (unsigned i = 0; i != MAttrs.size(); ++i) - Features.AddFeature(MAttrs[i]); - FeaturesStr = Features.getString(); - } - - if (MCPU == "native") - MCPU = sys::getHostCPUName(); - return TheTarget->createTargetMachine(TheTriple.getTriple(), - MCPU, FeaturesStr, + CPUStr, FeaturesStr, InitTargetOptionsFromCodeGenFlags(), RelocModel, CMModel, GetCodeGenOptLevel()); @@ -407,11 +384,19 @@ } Triple ModuleTriple(M->getTargetTriple()); + std::pair CPUFeatures; TargetMachine *Machine = nullptr; - if (ModuleTriple.getArch()) - Machine = GetTargetMachine(ModuleTriple); + if (ModuleTriple.getArch()) { + CPUFeatures = getCPUFeaturesStr(); + Machine = GetTargetMachine(ModuleTriple, CPUFeatures.first, + CPUFeatures.second); + } + std::unique_ptr TM(Machine); + // Override function attributes. + overrideFunctionAttributes(CPUFeatures.first, CPUFeatures.second, *M); + // If the output is set to be emitted to standard out, and standard out is a // console, print out a warning message and refuse to do it. We don't // impress anyone by spewing tons of binary goo to a terminal.