diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -226,7 +226,6 @@ bool HasStrictFP; unsigned char MaxAtomicPromoteWidth, MaxAtomicInlineWidth; - unsigned short SimdDefaultAlign; std::string DataLayoutString; const char *UserLabelPrefix; const char *MCountName; @@ -795,10 +794,6 @@ /// Return the maximum vector alignment supported for the given target. unsigned getMaxVectorAlign() const { return MaxVectorAlign; } - /// Return default simd alignment for the given target. Generally, this - /// value is type-specific, but this alignment can be used for most of the - /// types for the given target. - unsigned getSimdDefaultAlign() const { return SimdDefaultAlign; } unsigned getMaxOpenCLWorkGroupSize() const { return MaxOpenCLWorkGroupSize; } diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -79,6 +79,7 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/Support/Capacity.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" @@ -93,6 +94,7 @@ #include #include #include +#include #include #include #include @@ -2463,7 +2465,16 @@ } unsigned ASTContext::getOpenMPDefaultSimdAlign(QualType T) const { - unsigned SimdAlign = getTargetInfo().getSimdDefaultAlign(); + const std::vector &TargetFeatures = + Target->getTargetOpts().Features; + std::string TargetFeaturesString = std::accumulate( + TargetFeatures.cbegin(), TargetFeatures.cend(), std::string(), + [](const std::string &s1, const std::string &s2) { + return s1.empty() ? s2 : s1 + "," + s2; + }); + unsigned SimdAlign = llvm::OpenMPIRBuilder ::getSimdDefaultAlignment( + getTargetInfo().getTriple().str(), Target->getTargetOpts().CPU, + TargetFeaturesString); return SimdAlign; } diff --git a/clang/lib/Basic/TargetInfo.cpp b/clang/lib/Basic/TargetInfo.cpp --- a/clang/lib/Basic/TargetInfo.cpp +++ b/clang/lib/Basic/TargetInfo.cpp @@ -119,7 +119,6 @@ MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 0; MaxVectorAlign = 0; MaxTLSAlign = 0; - SimdDefaultAlign = 0; SizeType = UnsignedLong; PtrDiffType = SignedLong; IntMaxType = SignedLongLong; diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -87,7 +87,6 @@ PPCTargetInfo(const llvm::Triple &Triple, const TargetOptions &) : TargetInfo(Triple) { SuitableAlign = 128; - SimdDefaultAlign = 128; LongDoubleWidth = LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::PPCDoubleDouble(); HasStrictFP = true; diff --git a/clang/lib/Basic/Targets/WebAssembly.h b/clang/lib/Basic/Targets/WebAssembly.h --- a/clang/lib/Basic/Targets/WebAssembly.h +++ b/clang/lib/Basic/Targets/WebAssembly.h @@ -49,7 +49,6 @@ SuitableAlign = 128; LargeArrayMinWidth = 128; LargeArrayAlign = 128; - SimdDefaultAlign = 128; SigAtomicType = SignedLong; LongDoubleWidth = LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -399,9 +399,6 @@ return false; } - SimdDefaultAlign = - hasFeature("avx512f") ? 512 : hasFeature("avx") ? 256 : 128; - // FIXME: We should allow long double type on 32-bits to match with GCC. // This requires backend to be able to lower f80 without x87 first. if (!HasX87 && LongDoubleFormat == &llvm::APFloat::x87DoubleExtended()) diff --git a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp --- a/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp +++ b/lldb/source/Plugins/ExpressionParser/Clang/ClangExpressionParser.cpp @@ -500,8 +500,6 @@ auto target_info = TargetInfo::CreateTargetInfo( m_compiler->getDiagnostics(), m_compiler->getInvocation().TargetOpts); if (log) { - LLDB_LOGF(log, "Using SIMD alignment: %d", - target_info->getSimdDefaultAlign()); LLDB_LOGF(log, "Target datalayout string: '%s'", target_info->getDataLayoutString()); LLDB_LOGF(log, "Target ABI: '%s'", target_info->getABI().str().c_str()); diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -502,6 +502,14 @@ ArrayRef Loops, InsertPointTy ComputeIP); + /// Get the alignment value for given target + /// + /// \param Triple String which describes target triple + /// \param CPU String which describes target CPU + /// \param Features String which describes extra CPU features + static unsigned getSimdDefaultAlignment(const std::string &Triple, + StringRef CPU, StringRef Features); + private: /// Modifies the canonical loop to be a statically-scheduled workshare loop. /// diff --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h --- a/llvm/include/llvm/Target/TargetMachine.h +++ b/llvm/include/llvm/Target/TargetMachine.h @@ -108,6 +108,9 @@ std::unique_ptr MII; std::unique_ptr STI; + /// Simd target specific information + unsigned SimdDefaultAlignment = 0; + unsigned RequireStructuredCFG : 1; unsigned O0WantsFastISel : 1; @@ -204,6 +207,9 @@ return DL.getPointerSize(DL.getAllocaAddrSpace()); } + /// Return default SIMD alignment + unsigned getSimdDefaultAlignment() const { return SimdDefaultAlignment; } + /// Reset the target options based on the function's attributes. // FIXME: Remove TargetOptions that affect per-function code generation // from TargetMachine. diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -255,6 +255,50 @@ NewBr->setDebugLoc(DL); } +/// Create the TargetMachine object to query the backend for optimization +/// preferences. +/// +/// Ideally, this would be passed from the front-end to the OpenMPBuilder, but +/// e.g. Clang does not pass it to its CodeGen layer and creates it only when +/// needed for the LLVM pass pipline. We use some default options to avoid +/// having to pass too many settings from the frontend that probably do not +/// matter. +/// +/// Currently, TargetMachine is only used sometimes by the unrollLoopPartial +/// and getVectorTypeAlignment methods. If we are going to use TargetMachine +/// for more purposes, especially those that are sensitive to TargetOptions, +/// RelocModel and CodeModel, it might become be worth requiring front-ends to +/// pass on their TargetMachine, or at least cache it between methods. +/// Note that while fontends such as Clang have just a single main +/// TargetMachine per translation unit, "target-cpu" and "target-features" +/// that determine the TargetMachine are per-function and can be overrided +/// using __attribute__((target("OPTIONS"))). +static std::unique_ptr +createTargetMachine(const std::string &Triple, StringRef CPU, + StringRef Features, CodeGenOpt::Level OptLevel) { + std::string Error; + const llvm::Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error); + if (!TheTarget) + return {}; + + llvm::TargetOptions Options; + return std::unique_ptr(TheTarget->createTargetMachine( + Triple, CPU, Features, Options, /*RelocModel=*/std::nullopt, + /*CodeModel=*/std::nullopt, OptLevel)); +} + +/// Create the TargetMachine object to query the backend for optimization +/// preferences. +static std::unique_ptr +createTargetMachine(Function *F, CodeGenOpt::Level OptLevel) { + Module *M = F->getParent(); + + StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString(); + StringRef Features = F->getFnAttribute("target-features").getValueAsString(); + const std::string &Triple = M->getTargetTriple(); + return createTargetMachine(Triple, CPU, Features, OptLevel); +} + void llvm::spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch) { assert(New->getFirstInsertionPt() == New->begin() && @@ -3039,6 +3083,18 @@ Builder.CreateBr(NewBlocks.front()); } +unsigned OpenMPIRBuilder::getSimdDefaultAlignment(const std::string &Triple, + StringRef CPU, + StringRef Features) { + std::unique_ptr TgtInfo( + createTargetMachine(Triple, CPU, Features, CodeGenOpt::Default)); + if (!TgtInfo) { + return 0; + } + + return TgtInfo->getSimdDefaultAlignment(); +} + void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop, MapVector AlignedVars, Value *IfCond, OrderKind Order, @@ -3141,42 +3197,6 @@ addLoopMetadata(CanonicalLoop, LoopMDList); } -/// Create the TargetMachine object to query the backend for optimization -/// preferences. -/// -/// Ideally, this would be passed from the front-end to the OpenMPBuilder, but -/// e.g. Clang does not pass it to its CodeGen layer and creates it only when -/// needed for the LLVM pass pipline. We use some default options to avoid -/// having to pass too many settings from the frontend that probably do not -/// matter. -/// -/// Currently, TargetMachine is only used sometimes by the unrollLoopPartial -/// method. If we are going to use TargetMachine for more purposes, especially -/// those that are sensitive to TargetOptions, RelocModel and CodeModel, it -/// might become be worth requiring front-ends to pass on their TargetMachine, -/// or at least cache it between methods. Note that while fontends such as Clang -/// have just a single main TargetMachine per translation unit, "target-cpu" and -/// "target-features" that determine the TargetMachine are per-function and can -/// be overrided using __attribute__((target("OPTIONS"))). -static std::unique_ptr -createTargetMachine(Function *F, CodeGenOpt::Level OptLevel) { - Module *M = F->getParent(); - - StringRef CPU = F->getFnAttribute("target-cpu").getValueAsString(); - StringRef Features = F->getFnAttribute("target-features").getValueAsString(); - const std::string &Triple = M->getTargetTriple(); - - std::string Error; - const llvm::Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error); - if (!TheTarget) - return {}; - - llvm::TargetOptions Options; - return std::unique_ptr(TheTarget->createTargetMachine( - Triple, CPU, Features, Options, /*RelocModel=*/std::nullopt, - /*CodeModel=*/std::nullopt, OptLevel)); -} - /// Heuristically determine the best-performant unroll factor for \p CLI. This /// depends on the target processor. We are re-using the same heuristics as the /// LoopUnrollPass. diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp --- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -333,6 +333,7 @@ TLOF(createTLOF(getTargetTriple())), TargetABI(computeTargetABI(TT, Options)), Endianness(isLittleEndianTriple(TT) ? Endian::LITTLE : Endian::BIG) { + SimdDefaultAlignment = 128; initAsmInfo(); } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -139,6 +139,7 @@ this->Options.FunctionSections = true; this->Options.DataSections = true; this->Options.UniqueSectionNames = true; + SimdDefaultAlignment = 128; initAsmInfo(); diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -245,6 +245,13 @@ setSupportsDebugEntryValues(true); initAsmInfo(); + + if (FS.contains("+avx512f")) + SimdDefaultAlignment = 512; + else if (FS.contains("+avx")) + SimdDefaultAlignment = 256; + else + SimdDefaultAlignment = 128; } X86TargetMachine::~X86TargetMachine() = default;